Setup

Set main seed

So it’s same for all instances

main.seed = 1028

Load libraries

UsingR version 4.0.5 (2021-03-31).

suppressPackageStartupMessages({
  library(vegan)
  library(tidyverse)
  library(stats)
  library(phyloseq)
  library(mefa)
  library(qiime2R)
  library(microbiome)
  library(fantaxtic)
  library(pals)
  library(stringr)
  library(car)
  library(rstatix)
  library(ggpubr)
  library(corrplot)
  library(readxl)
  library(psych)
  library(sjPlot)
  library(RVAideMemoire)
  library(purrr)
  library(olsrr)
  library(ggformula)
  library(reshape2)
  library(mosaic)
  library(micropower)
  library(kableExtra)
  library(gridExtra) #for scatterplots
  library(summarytools) #for chi-square test and tables
  library(pairwiseAdonis)
  source("../helper_functions/omega_sq_adonis.R") #load function to calculate omega squared for adonis objects
  library(Maaslin2)
  library(ppcor) #for eta-squared calculations 
})

Load data

Note: Filepaths not shown

Data Wrangling

Create phyloseq object

physeq_M2448 <- qza_to_phyloseq(
  features=otu_table_path,
  tree=tree_path,
  taxonomy=taxonomy_path,
  metadata=metadata_path
)

#create a phyloseq object containing only 24M samples
physeq_M24 <- subset_samples(physeq_M2448, Timepoint_months=="24")

Wrangle data for alpha diversity analyses

#read in continuous stress data
M24_stress <-
  read_csv(m24_stress_data)

M48_stress <-
  read_csv(m48_stress_data)

leq_resp <-
  read_csv(leq_resp_file) %>% 
  dplyr::select(
    subjid,
    person_filling_leq = person_filling_Y04
  )

#bind these datasets together
stress_data <-
  M24_stress %>% 
  bind_rows(M48_stress) %>% 
  left_join(leq_resp, by = "subjid")

#read in alpha diversity data from qiime2
shannon <-
  read_tsv(shannon_file) %>% 
  dplyr::rename('#SampleID' = ...1)

faith <-
  read_tsv(faith_file) %>% 
  dplyr::rename('#SampleID' = ...1)

evenness <-
  read_tsv(evenness_file) %>% 
  dplyr::rename('#SampleID' = ...1)

richness <-
  read_tsv(richness_file) %>% 
  dplyr::rename('#SampleID' = ...1)

#create dataset with alpha diversity metrics & stress
alpha_diversity_stress <-
  shannon %>% 
  left_join(faith, by = "#SampleID") %>% 
  left_join(evenness, by = "#SampleID") %>% 
  left_join(richness, by = "#SampleID") %>% 
  left_join(stress_data, by = "#SampleID")

#read in child sex data
sex_data <- readxl::read_xlsx(sex_file) %>% 
  dplyr::rename(subjid=SubjectID)

#read in other covariates
bf_data <- read_csv(bf_file) %>% 
  full_join(read_csv(delivery_file), by = "subjid") %>% 
  full_join(read_csv(income_file), by = "subjid") %>% 
  full_join(read_csv(age_file), by = "subjid")

gest_age <- readxl::read_xlsx(gestage_file) %>% 
  dplyr::select(
    subjid = SubjectID,
    GA
  )

biotics <- read_csv(biotics_file)

#read in sequencing batch
meta <- read_tsv(metadata_path)
meta <- meta[-c(1), ] #remove #q2:types header
meta <- 
  meta %>% 
  dplyr::select(
    '#SampleID', 
    Sequencing_batch
    )

#bind covariate data to rest
alpha_diversity_stress <-
  alpha_diversity_stress %>% 
  left_join(sex_data, by = "subjid") %>% 
  left_join(bf_data, by = "subjid") %>% 
  left_join(meta, by = "#SampleID") %>% 
  left_join(biotics, by = "subjid") %>% 
  left_join(gest_age, by = "subjid")

#read in cbcl 
child_beh4 <- read_csv(beh4_file) %>% 
  dplyr::select(
    subjid,
    age_y4_cbcl = age_y4,
    cbcl_respondent_y4,
    cbcl_gi_y4,
    contains("tot_")
  ) 

child_beh2 <-
  read_csv(beh2_file) %>% 
  dplyr::select(
    subjid,
    age_m24_cbcl = age_m24,
    cbcl_respondent_m24,
    cbcl_gi_m24,
    contains("tot_")
  )

#bind cbcl data to rest
alpha_diversity_stress <-
  alpha_diversity_stress %>% 
  left_join(child_beh4, by = "subjid") %>% 
  left_join(child_beh2, by = "subjid")

#create categorical postnatal stress var because LEQ has too few scale points
alpha_diversity_stress <- 
  alpha_diversity_stress %>% 
  mutate(
    postnatal_stress = case_when(
      timepoint == 48 & child_neg_events_sum > 0 ~ 1,
      timepoint == 48 & child_neg_events_sum == 0 ~ 0,
      timepoint == 24 & leq_b2_events_total_Y04 > 0 ~ 1,
      timepoint == 24 & leq_b2_events_total_Y04 == 0 ~ 0
    )
  ) %>% 
  mutate(postnatal_stress = as.factor(postnatal_stress))

#remove all unused variables
alpha_diversity_stress <- alpha_diversity_stress %>% 
  dplyr::select(
    -contains("neg"),
    -contains("leq"),
    -(contains("ctq")& !contains("total")),
    -maltreated_M54,
    -contains("days"),
    -c_age_years_stoolproxy_y4,
    -person_filling_M54
  )

#create dataset of cases that have usable microbiome data and reported on at least 1 timepoint of adversity exposure to be used in analyses (N=450)
alpha_diversity_stress_usable <-
  alpha_diversity_stress %>% 
  filter(timepoint==24 & !(is.na(STAI_prorated_state_pw26) & is.na(ctq_total_score_M54) & is.na(postnatal_stress)))

#create categorical prenatal adversity variable
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>% 
  mutate(stai_above_clin_cutoff = ifelse(STAI_prorated_state_pw26 > 40, 1, 0))

#create standardized cbcl scores with mean=100, sd=15 within each sex (scores are named cbcl{subscale}tot_{timepoint}_std) and then remove unstandardized variables
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  group_by(sex_binary) %>% 
  dplyr::mutate(across(c(contains("tot_"), contains("gi")), .fns = list(std = ~as.vector(scale(.))*15 + 100), .names="{col}_{fn}")) %>% 
  ungroup() %>% 
  dplyr::select(
    -(contains("tot_") & !contains("std")),
    -(contains("gi") & !contains("std"))
  )

Note on decision to standardize cbcl raw scores within each sex: ASEBA Y1.5-5 manual recommends using raw scores but standardizing by within your sample by sex (pg 122). Their suggested mean is 100, sd is 15.

Create cumulative adversity variables

#create sum of timepoints with "substantial" adversity exposure (range = 0 (no timempoints with substantial adversity exposure) to 3 (substantial adversity exposure at all timepoints))
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>% 
  dplyr::mutate(
    sum_advrsty = as.numeric(as.character(maltreated_mod_M54)) + stai_above_clin_cutoff + as.numeric(as.character(postnatal_stress))
  )

#determine how many dyads have 0, 1, 2, and 3 timepoints of exposure
alpha_diversity_stress_usable %>% ggplot(aes(x=sum_advrsty, fill=as.factor(sum_advrsty))) +
  geom_histogram(binwidth = 1) +
  stat_bin(aes(y=..count.., label=ifelse(..count..==0,"",..count..)), geom="text", vjust=-.5) +
  scale_fill_discrete("Timepoints with Adversity Exposure") +
  xlab("Number of Timepoints with Adverity Exposure") +
  ylab("Number of Participants") 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#create the collapsed grouping of 3 cumulative adversity levels (because very few dyads had sum_advrsty=3)
alpha_diversity_stress_usable <- alpha_diversity_stress_usable %>% 
  mutate(
    cumulative_adv = case_when(
      sum_advrsty == 0 ~ 0,
      sum_advrsty == 1 ~ 1,
      sum_advrsty == 2 | sum_advrsty == 3 ~ 2
    )
  ) 

#convert categorical binary & multicategorical variables to factor type (for analyses that require it later)
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  dplyr::mutate(across(c(cumulative_adv, sex_binary, deliv_mode, postnatal_stress, Sequencing_batch, maltreated_mod_M54, antibio_M24, probiotics_M24), ~as.factor(.x)))

#write alpha diversity analyses dataset to file
write_csv(alpha_diversity_stress_usable, "../../data/analysis_datasets/fran/alpha_div_stress_usable.csv")
#also write these data to form B data folder for submission with GUSTO form B application
write_csv(alpha_diversity_stress_usable, "../../manuscripts/fran_adversity_microbiome/Form_B_submission/data_updated/alpha_diversity_and_metadata.csv")

Check whether there are differences in selected socioemotional functioning domains by child sex, family income, and ethnicity

Socioemotional Functioning at Age 2 Years

#get relevant variables
cbcl_covs <- alpha_diversity_stress_usable %>% 
  dplyr::select(
    monthly_income_per_member,
    GA,
    cbclintprobtot_m24,
    cbcltotprobtot_m24,
    cbcldevprobtot_m24,
    age_m24_cbcl
  )

#correlations with cbcl and continuous covs
corPlot(cbcl_covs,
        numbers = TRUE,
        stars = TRUE,
        show.legend = TRUE,
        cex.axis = 0.8,
        cex = 0.5) 
#no associations between monthly income and cbcl outcome vars

#associations with categorical covs
t.test(cbclintprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcltotprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcldevprobtot_m24~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns

summary(aov(cbclintprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcltotprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcldevprobtot_m24~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns

No domains differ

Socioemotional Functioning at Age 4 Years

#get relevant variables
cbcl_covs_y4 <- alpha_diversity_stress_usable %>% 
  dplyr::select(
    monthly_income_per_member,
    GA,
    cbclintprobtot_y4,
    cbcltotprobtot_y4,
    cbclextprobtot_y4,
    cbclattprobtot_y4,
    cbclsleepprobtot_y4,
    cbcldevprobtot_y4,
    cbcladhdprobtot_y4,
    age_y4_cbcl
  )

#correlations with cbcl domains and continuous covariates
corPlot(cbcl_covs_y4,
        numbers = TRUE,
        stars = TRUE,
        show.legend = TRUE,
        cex.axis = 0.8,
        cex = 0.5) 
#no differences

#associations with categorical covs
t.test(cbclintprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcltotprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbclextprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig, males higher than females
t.test(cbclattprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig, males higher than females
t.test(cbclsleepprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcldevprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(cbcladhdprobtot_y4~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns


summary(aov(cbclintprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcltotprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclextprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclattprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbclsleepprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcldevprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(cbcladhdprobtot_y4~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns

Externalizing and attention problems at year 4 differ by sex (higher in male children on average) – further validating the decision to standardize socioemotional functioning variables within each sex (performed in data wrangling above).

Check that Sequencing Batch is not associated with other variables of interest

Test association between sequencing batch and adversity variables, and numbers in each batch

#t-test with sequencing batch and preconception, prenatal adversity
t.test(ctq_total_score_M54~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(STAI_prorated_state_pw26~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns

#chi-square test with sequencing batch and postnatal stress, adversity grouping variables
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$postnatal_stress) #ns
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$maltreated_mod_M54) #ns
chisq.test(alpha_diversity_stress_usable$Sequencing_batch, alpha_diversity_stress_usable$stai_above_clin_cutoff) #ns

#count the number of each group in the sequencing batches
alpha_diversity_stress_usable %>%
  filter(!is.na(postnatal_stress)) %>% 
  group_by(Sequencing_batch, postnatal_stress) %>%
  summarise(cnt = n()) %>%
  mutate(freq = round(cnt / sum(cnt), 3)) %>% 
  arrange(desc(freq)) #22% yes in batch1 (of non-nas), 25% in batch2

alpha_diversity_stress_usable %>%
  filter(!is.na(maltreated_mod_M54)) %>% 
  group_by(Sequencing_batch, maltreated_mod_M54) %>%
  summarise(cnt = n()) %>%
  mutate(freq = round(cnt / sum(cnt), 3)) %>% 
  arrange(desc(freq)) #42% yes in batch1 (of non-nas), 46% in batch2

alpha_diversity_stress_usable %>%
  filter(!is.na(stai_above_clin_cutoff)) %>% 
  group_by(Sequencing_batch, stai_above_clin_cutoff) %>%
  summarise(cnt = n()) %>%
  mutate(freq = round(cnt / sum(cnt), 3)) %>% 
  arrange(desc(freq)) #22% in batch 1 for each group

Conclusion: adversity grouping variable prevalences do not differ by sequencing batch

Initial Data Visualizations

Barplots

# get top 25 most abundant taxa
physeq_ttaxa <- get_top_taxa(physeq_obj = physeq_M24,
                             n = 25,
                             relative = TRUE,
                             discard_other = TRUE)

#group by postnatal adversity
physeq_ttaxa_leq <- merge_samples(physeq_ttaxa, "postnatal_stress")

#create 'genus species' variable
tmp_leq <- name_taxa(physeq_ttaxa_leq, species = TRUE)

#barplot based on postnatal adversity group (NAs are removed)
fantaxtic_bar(tmp_leq, color_by = "Species", label_by = "Species") +
  xlab("Postnatal Adversity") +
  ylab("Relative Abundance") +
  scale_fill_manual(values=as.vector(glasbey(32)))
##                                                Level N.color.shades
## 1              Faecalibacterium uncultured bacterium              1
## 2   Clostridium sensu stricto 1 uncultured bacterium              1
## 3                Enterococcus Enterococcus devriesei              1
## 4                Staphylococcus uncultured bacterium              1
## 5                 Streptococcus uncultured bacterium              2
## 6        Erysipelatoclostridium uncultured bacterium              2
## 7     Collinsella Collinsella aerofaciens ATCC 25986              1
## 8  Bifidobacterium Bifidobacterium pseudocatenulatum              1
## 9          Escherichia-Shigella uncultured bacterium              1
## 10                  Cronobacter uncultured bacterium              1
## 11                  Akkermansia uncultured bacterium              1
## 12                  Bacteroides uncultured bacterium              1
## 13                      Blautia uncultured bacterium              4
## 14  Fusicatenibacter uncultured Firmicutes bacterium              1
## 15   [Eubacterium] hallii group uncultured bacterium              1
## 16              Intestinibacter uncultured bacterium              2
## 17                 Anaerostipes uncultured bacterium              1
## 18 Lachnoclostridium uncultured Firmicutes bacterium              1
## 19                 Tyzzerella 4 uncultured bacterium              1
##    Central.color
## 1        #6495ED
## 2        #EDD264
## 3        #646AED
## 4        #DDED64
## 5        #8A64ED
## 6        #B2ED64
## 7        #B564ED
## 8        #87ED64
## 9        #E064ED
## 10       #64ED6D
## 11       #ED64CF
## 12       #64ED98
## 13       #ED64A3
## 14       #64EDC3
## 15       #ED6478
## 16       #64ECED
## 17       #ED7B64
## 18       #64C0ED
## 19       #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

#group by prenatal adversity group (above STAI-S cutoff for clinical levels of anxiety)
physeq_ttaxa_merged_stai <- merge_samples(physeq_ttaxa, "stai_state_above_clin_cutoff_pw26")
tmp_stai <- name_taxa(physeq_ttaxa_merged_stai, species = TRUE)
  
#barplot based on prenatal adversity (NAs are removed)
fantaxtic_bar(tmp_stai, color_by = "Species", label_by = "Species") +
  xlab("Prenatal Adversity") +
  ylab("Relative Abundance") +
  scale_fill_manual(values=as.vector(glasbey(32)))
##                                                Level N.color.shades
## 1              Faecalibacterium uncultured bacterium              1
## 2   Clostridium sensu stricto 1 uncultured bacterium              1
## 3                Enterococcus Enterococcus devriesei              1
## 4                Staphylococcus uncultured bacterium              1
## 5                 Streptococcus uncultured bacterium              2
## 6        Erysipelatoclostridium uncultured bacterium              2
## 7     Collinsella Collinsella aerofaciens ATCC 25986              1
## 8  Bifidobacterium Bifidobacterium pseudocatenulatum              1
## 9          Escherichia-Shigella uncultured bacterium              1
## 10                  Cronobacter uncultured bacterium              1
## 11                  Akkermansia uncultured bacterium              1
## 12                  Bacteroides uncultured bacterium              1
## 13                      Blautia uncultured bacterium              4
## 14  Fusicatenibacter uncultured Firmicutes bacterium              1
## 15   [Eubacterium] hallii group uncultured bacterium              1
## 16              Intestinibacter uncultured bacterium              2
## 17                 Anaerostipes uncultured bacterium              1
## 18 Lachnoclostridium uncultured Firmicutes bacterium              1
## 19                 Tyzzerella 4 uncultured bacterium              1
##    Central.color
## 1        #6495ED
## 2        #EDD264
## 3        #646AED
## 4        #DDED64
## 5        #8A64ED
## 6        #B2ED64
## 7        #B564ED
## 8        #87ED64
## 9        #E064ED
## 10       #64ED6D
## 11       #ED64CF
## 12       #64ED98
## 13       #ED64A3
## 14       #64EDC3
## 15       #ED6478
## 16       #64ECED
## 17       #ED7B64
## 18       #64C0ED
## 19       #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

#group by preconception adversity group ('moderate to severe' maltreatment in at least 1 category)
physeq_ttaxa_merged_ctq <- merge_samples(physeq_ttaxa, "maltreated_mod_M54")
tmp_ctq <- name_taxa(physeq_ttaxa_merged_ctq, species = TRUE)

#barplot based on preconception adversity
fantaxtic_bar(tmp_ctq, color_by = "Species", label_by = "Species") +
  xlab("Preconception Adversity") +
  ylab("Relative Abundance") +
  scale_fill_manual(values=as.vector(glasbey(32)))
##                                                Level N.color.shades
## 1              Faecalibacterium uncultured bacterium              1
## 2   Clostridium sensu stricto 1 uncultured bacterium              1
## 3                Enterococcus Enterococcus devriesei              1
## 4                Staphylococcus uncultured bacterium              1
## 5                 Streptococcus uncultured bacterium              2
## 6        Erysipelatoclostridium uncultured bacterium              2
## 7     Collinsella Collinsella aerofaciens ATCC 25986              1
## 8  Bifidobacterium Bifidobacterium pseudocatenulatum              1
## 9          Escherichia-Shigella uncultured bacterium              1
## 10                  Cronobacter uncultured bacterium              1
## 11                  Akkermansia uncultured bacterium              1
## 12                  Bacteroides uncultured bacterium              1
## 13                      Blautia uncultured bacterium              4
## 14  Fusicatenibacter uncultured Firmicutes bacterium              1
## 15   [Eubacterium] hallii group uncultured bacterium              1
## 16              Intestinibacter uncultured bacterium              2
## 17                 Anaerostipes uncultured bacterium              1
## 18 Lachnoclostridium uncultured Firmicutes bacterium              1
## 19                 Tyzzerella 4 uncultured bacterium              1
##    Central.color
## 1        #6495ED
## 2        #EDD264
## 3        #646AED
## 4        #DDED64
## 5        #8A64ED
## 6        #B2ED64
## 7        #B564ED
## 8        #87ED64
## 9        #E064ED
## 10       #64ED6D
## 11       #ED64CF
## 12       #64ED98
## 13       #ED64A3
## 14       #64EDC3
## 15       #ED6478
## 16       #64ECED
## 17       #ED7B64
## 18       #64C0ED
## 19       #EDA664
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

Distributions

# for LEQ, who was the respondent?
alpha_diversity_stress_usable %>% 
  group_by(any_bf_months) %>% 
  summarise(n())
## # A tibble: 6 × 2
##   any_bf_months `n()`
##   <chr>         <int>
## 1 <1M              82
## 2 >12M            104
## 3 1M_to_3M         72
## 4 3M_to_6M         80
## 5 6M_to_12M       100
## 6 <NA>             12
#preconception adversity
alpha_diversity_stress_usable %>% 
  filter(timepoint==24) %>% 
  ggplot(aes(ctq_total_score_M54)) +
  geom_histogram(binwidth = 2) +
  xlab("Preconception Adversity Score")

#prenatal adversity
alpha_diversity_stress_usable %>% 
  filter(timepoint==24) %>% 
  ggplot(aes(STAI_prorated_state_pw26)) +
  geom_histogram(binwidth = 2) +
  geom_vline(aes(xintercept = 40, colour="cutoff")) +
  scale_color_manual(name = "Clinical Cutoff", values = c("cutoff" = "red"))

#postnatal adversity
alpha_diversity_stress_usable %>% 
  filter(timepoint==24 & !is.na(postnatal_stress)) %>% 
  ggplot(aes(postnatal_stress)) +
  geom_bar() +
  xlab("Postnatal Adversity")

#Alpha diversity -- shannon index
alpha_diversity_stress %>% 
  filter(timepoint==24) %>% 
  ggplot(aes(shannon_entropy)) +
  geom_histogram(binwidth = 0.25) +
  xlab("Shannon Index")

#alpha diveristy -- evenness
alpha_diversity_stress %>% 
  filter(timepoint==24) %>% 
  ggplot(aes(pielou_evenness)) +
  geom_histogram(binwidth = 0.05)

#alpha diversity -- faith pd
alpha_diversity_stress_usable%>% 
  filter(timepoint==24) %>% 
  ggplot(aes(faith_pd)) +
  geom_histogram(binwidth = 0.5) 

#alpha diversity -- observed features
alpha_diversity_stress_usable%>% 
  filter(timepoint==24) %>% 
  ggplot(aes(observed_features)) +
  geom_histogram(binwidth = 2)

#cbcl total problems age 2 years 
alpha_diversity_stress_usable %>%
  ggplot(aes(cbcltotprobtot_m24_std)) +
  geom_histogram(binwidth = 5) 

#cbcl sleep problems at age 4 years
alpha_diversity_stress_usable %>%
  ggplot(aes(cbclsleepprobtot_y4_std)) +
  geom_histogram(binwidth = 5) 

Descriptive statistics

#Ns
#how many are of each ethnicity?
alpha_diversity_stress_usable %>% 
  dplyr::count(mother_ethnicity)
#each sex?
alpha_diversity_stress_usable %>% 
  dplyr::count(sex_binary) #0 = female, 1 = male
#each delivery mode?
alpha_diversity_stress_usable %>% 
  dplyr::count(deliv_mode_str)
#in each postnatal adveristy group?
alpha_diversity_stress_usable %>% 
  dplyr::count(postnatal_stress)
#in each preconception adversity group?
alpha_diversity_stress_usable %>% 
  dplyr::count(maltreated_mod_M54 == 1)
#in each prenatal adveristy group?
alpha_diversity_stress_usable %>% 
  dplyr::count(STAI_prorated_state_pw26 > 40)
#each antibiotic use group? (yes/no)
alpha_diversity_stress_usable %>% 
  dplyr::count(antibio_M24 ==1)
#each probiotic use group? (yes/no)
alpha_diversity_stress_usable %>% 
  dplyr::count(probiotics_M24 ==1)
#each sequencing batch?
alpha_diversity_stress_usable %>% 
  dplyr::count(Sequencing_batch =="batch1")

#who is the respondent for LEQ?
alpha_diversity_stress_usable %>% 
  filter(!is.na(leq_b2_events_total_Y04)) %>%
  dplyr::count(person_filling_Y04)

#who is the respondent for CBCL year 2?
alpha_diversity_stress_usable %>% 
  filter(!is.na(cbcl_respondent_m24)) %>%
  dplyr::count(cbcl_respondent_m24)

#who is the respondent for CBCL year 4?
alpha_diversity_stress_usable %>% 
  filter(!is.na(cbcl_respondent_y4)) %>%
  dplyr::count(cbcl_respondent_y4)

#how many for each breastfeeding category? 
table(alpha_diversity_stress_usable$only_bf_months)
table(alpha_diversity_stress_usable$any_bf_months)

#mean, sd, range (continuous covariates, adversities, alpha diversity metrics)
favstats(~monthly_income_per_member, data=alpha_diversity_stress_usable)
favstats(~leq_b2_events_total_Y04, data=alpha_diversity_stress_usable)
favstats(~STAI_prorated_state_pw26, data=alpha_diversity_stress_usable)
favstats(~ctq_total_score_M54, data=alpha_diversity_stress_usable)
favstats(~GA, data=alpha_diversity_stress_usable)
favstats(~observed_features, data=alpha_diversity_stress_usable)
favstats(~faith_pd, data=alpha_diversity_stress_usable)
favstats(~shannon_entropy, data=alpha_diversity_stress_usable)
favstats(~pielou_evenness, data=alpha_diversity_stress_usable)

#mean, sd, range (cbcl variables)
favstats(~cbcldevprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbclintprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbclsleepprobtscore_m24, data=alpha_diversity_stress_usable)
favstats(~cbcltotprobtot_m24_std, data=alpha_diversity_stress_usable)
favstats(~cbcladhdprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclattprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbcldevprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclextprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclintprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbclsleepprobtot_y4_std, data=alpha_diversity_stress_usable)
favstats(~cbcltotprobtot_y4_std, data=alpha_diversity_stress_usable)

#how many complete on year 2 cbcl? (all participants that have 1 subscale, have all of them)
alpha_diversity_stress_usable %>% dplyr::count(!is.na(cbclintprobtot_m24_std)) #e.g., 178 with complete for internalizing problems y2
alpha_diversity_stress_usable %>% dplyr::count(!is.na(cbclintprobtot_y4_std)) #e.g., 300 with complete for internalizing problems y4

#mean, sd, range (age variables)
favstats(~c_age_years_stoolproxy_m24, data=alpha_diversity_stress_usable)
favstats(~age_y4_cbcl, data=alpha_diversity_stress_usable)
favstats(~age_m24_cbcl, data=alpha_diversity_stress_usable)

#how many complete on age variables?
alpha_diversity_stress_usable %>% dplyr::count(!is.na(age_m24_cbcl)) #only 145 with complete data on age at Y2
alpha_diversity_stress_usable %>% dplyr::count(!is.na(age_y4_cbcl)) #only 299 with complete data on age at Y4

#how many with complete data on all adversities?
alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=205

Adversity Effects on the Gut Microbiome

Alpha Diversity

Covariate Selection: Bivariate Correlations with Continuous Covariates

#create df with just continuous variables for correlation matrix
alpha_div_stress_cor <-
  alpha_diversity_stress_usable %>% 
  dplyr::select(
    shannon_entropy,
    faith_pd,
    pielou_evenness,
    observed_features,
    STAI_prorated_state_pw26,
    ctq_total_score_M54,
    monthly_income_per_member,
    GA,
    c_age_years_stoolproxy_m24
  )

#generate correlations
cor(alpha_div_stress_cor, use="pairwise.complete.obs")
##                            shannon_entropy      faith_pd pielou_evenness
## shannon_entropy               1.0000000000  0.4183426493     0.971758951
## faith_pd                      0.4183426493  1.0000000000     0.315802288
## pielou_evenness               0.9717589514  0.3158022878     1.000000000
## observed_features             0.7183060298  0.5687247721     0.537840793
## STAI_prorated_state_pw26      0.0047040890 -0.0510138731     0.003943694
## ctq_total_score_M54           0.0004104005 -0.0474815685     0.001324786
## monthly_income_per_member    -0.0207373915 -0.0651941518     0.013721329
## GA                            0.0358812113  0.0003925481     0.056962115
## c_age_years_stoolproxy_m24    0.0566809881  0.0162011701     0.049665400
##                            observed_features STAI_prorated_state_pw26
## shannon_entropy                  0.718306030              0.004704089
## faith_pd                         0.568724772             -0.051013873
## pielou_evenness                  0.537840793              0.003943694
## observed_features                1.000000000              0.002426840
## STAI_prorated_state_pw26         0.002426840              1.000000000
## ctq_total_score_M54              0.006883225              0.176639006
## monthly_income_per_member       -0.118933139             -0.143561284
## GA                              -0.049055254             -0.045515010
## c_age_years_stoolproxy_m24       0.058456510              0.076372335
##                            ctq_total_score_M54 monthly_income_per_member
## shannon_entropy                   0.0004104005               -0.02073739
## faith_pd                         -0.0474815685               -0.06519415
## pielou_evenness                   0.0013247861                0.01372133
## observed_features                 0.0068832251               -0.11893314
## STAI_prorated_state_pw26          0.1766390064               -0.14356128
## ctq_total_score_M54               1.0000000000                0.06496342
## monthly_income_per_member         0.0649634151                1.00000000
## GA                                0.0116413647                0.13389398
## c_age_years_stoolproxy_m24        0.0743657585                0.14566203
##                                       GA c_age_years_stoolproxy_m24
## shannon_entropy             0.0358812113                 0.05668099
## faith_pd                    0.0003925481                 0.01620117
## pielou_evenness             0.0569621151                 0.04966540
## observed_features          -0.0490552535                 0.05845651
## STAI_prorated_state_pw26   -0.0455150099                 0.07637233
## ctq_total_score_M54         0.0116413647                 0.07436576
## monthly_income_per_member   0.1338939839                 0.14566203
## GA                          1.0000000000                -0.15288394
## c_age_years_stoolproxy_m24 -0.1528839353                 1.00000000
cor.test(alpha_diversity_stress_usable$observed_features, alpha_diversity_stress_usable$monthly_income_per_member)
## 
##  Pearson's product-moment correlation
## 
## data:  x and y
## t = -2.4372, df = 414, p-value = 0.01522
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.21264715 -0.02305111
## sample estimates:
##        cor 
## -0.1189331
#visualize correlations
corPlot(alpha_div_stress_cor,
        numbers=TRUE,
        main = "",
        stars=TRUE,
        labels = c("Shannon index", "Faith PD", "Pielou's Evennes", "Observed Features", "Prenatal Adversity", "Preconception Adversity", "Income", "Gestational Age", "Child Age at Stool Collection"),
        show.legend=TRUE,
        diag=FALSE,
        upper=FALSE,
        scale = TRUE,
        xlas=2,
#        gr=gr,
        cex.axis = 0.8,
        cex = 0.5)

Significant continuous covariates: monthly income

Covariate Selection: T-tests & ANOVAs with Categorical Covariates

Potential covariates: sex_binary, delivery mode, breastfeeding vars, ethnicity, antibiotics, probiotics, sequencing batch

#alpha diveristy by sex
t.test(shannon_entropy~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #females higher
t.test(pielou_evenness~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~sex_binary, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #females higher

#differences in alpha div by delivery mode 
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(deliv_mode_str = as.factor(deliv_mode_str)) #convert categorical var to factor

t.test(shannon_entropy~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #vaginally born higher
t.test(pielou_evenness~deliv_mode_str, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns 
t.test(observed_features~deliv_mode_str, data = alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns


#difference in alpha div by duration of exclusive breastfeeding (one-way ANOVA)
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(only_bf_months = as.factor(only_bf_months)) #convert categorical var to factor

summary(aov(shannon_entropy~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~only_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(observed_features~only_bf_months, data=alpha_diversity_stress_usable)) #ns

#difference in alpha diversity by duration of any breastfeeding (one-way ANOVA)
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(any_bf_months = as.factor(any_bf_months)) #convert categorical var to factor

summary(aov(shannon_entropy~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~any_bf_months, data=alpha_diversity_stress_usable)) #ns
summary(aov(observed_features~any_bf_months, data=alpha_diversity_stress_usable)) #ns

#difference in alpha diversity by ethnicity (one-way ANOVA)
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(mother_ethnicity = as.factor(mother_ethnicity)) #convert categorical var to factor

summary(aov(shannon_entropy~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(faith_pd~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns
summary(aov(pielou_evenness~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns 
summary(aov(observed_features~mother_ethnicity, data=alpha_diversity_stress_usable)) #ns

#difference in alpha diversity by antibiotics
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(antibio_M24 = as.factor(antibio_M24))

t.test(shannon_entropy~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~antibio_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns

#difference in alpha diversity by probiotics
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(probiotics_M24 = as.factor(probiotics_M24))

t.test(shannon_entropy~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(faith_pd~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(observed_features~probiotics_M24, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns

#difference in alpha diversity by sequencing batch
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(Sequencing_batch = as.factor(Sequencing_batch))

t.test(shannon_entropy~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig (batch 1 higher)
t.test(faith_pd~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns
t.test(pielou_evenness~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #sig (batch 1 higher)
t.test(observed_features~Sequencing_batch, data=alpha_diversity_stress_usable, alternative="two.sided", paired=FALSE) #ns

Significant categorical covariates: delivery mode, sex, sequencing batch

Regression Prep: dummy code multicategorical variables

Non-binary categorical covariates: ethnicity (3 levels), duration of exclusive breastfeeding (4 levels)
Dummy coding means each group is compared to the reference group

#dummy code ethnicity
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
                       eth_f1 = (mother_ethnicity == "malay"),
                       eth_f2 = (mother_ethnicity == "indian"))
#factor 1 = Malay relative to Chinese
#factor 2 = Indian relative to Chinese 
#Chinese is reference group

#dummy code duration of exclusive breastfeeding
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
                       bf_f1 = (only_bf_months == "1M_to_3M"),
                       bf_f2 = (only_bf_months == "3M_to_6M"),
                       bf_f3 = (only_bf_months == "6M_to_12M"))
#less than 1 month is reference group
#factor 1 = 1 to 3 months relative to 1 month
#factor 2 = 3 to 6 months relative to 1 month
#factor 3 = 6 to 12 months relative to 1 month

#dummy code cumulative adversity in comparison to no exposure group
alpha_diversity_stress_usable <- base::transform(alpha_diversity_stress_usable,
                       cumulative_0v1 = (cumulative_adv == "1"),
                       cumulative_0v2 = (cumulative_adv == "2"))
#0 (no adversity exposure) is the reference group
#factor 1 = no exposure relative to 1 timepoint
#factor 2 = no exposure relative to 2+ timepoints

Regression Diagnostics

Note: We do not need to check normality of residuals. Since the number of observations per variable is > 10 in all models, violations of normality will not impact results Schmidt & Finan, 2018

#define base models -- no covariates
base_mod_precon <- lm(shannon_entropy~ctq_total_score_M54, data = alpha_diversity_stress_usable, na.action = na.exclude) #na.exclude allows for residuals to have NAs added so that they can be joined to df which includes more than complete cases
alpha_diversity_stress_usable$pred_base_mod_precon <- predict(base_mod_precon)
alpha_diversity_stress_usable$resid_base_mod_precon <- residuals(base_mod_precon)

base_mod_prenat <- lm(shannon_entropy~STAI_prorated_state_pw26, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_base_mod_prenat <- predict(base_mod_prenat)
alpha_diversity_stress_usable$resid_base_mod_prenat <- resid(base_mod_prenat)

base_mod_postnat <- lm(shannon_entropy~postnatal_stress, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_base_mod_postnat <- predict(base_mod_postnat)
alpha_diversity_stress_usable$resid_base_mod_postnat <- resid(base_mod_postnat)

#check homoskadisticity of residuals -- base models
gf_point(resid_base_mod_precon~pred_base_mod_precon, data = alpha_diversity_stress_usable) 
gf_point(resid_base_mod_prenat~pred_base_mod_prenat, data = alpha_diversity_stress_usable) 
gf_point(resid_base_mod_postnat~pred_base_mod_postnat, data = alpha_diversity_stress_usable) 

#outliers
ols_plot_dfbetas(base_mod_precon)
ols_plot_dfbetas(base_mod_prenat)
ols_plot_dfbetas(base_mod_postnat)

#define models with covariates (delivery mode, child sex, sequencing batch, income)
cov1_mod_precon <-lm(shannon_entropy~ctq_total_score_M54+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_precon <- predict(cov1_mod_precon)
alpha_diversity_stress_usable$resid_cov1_mod_precon <- residuals(cov1_mod_precon)

cov1_mod_prenat <-lm(shannon_entropy~STAI_prorated_state_pw26+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_prenat <- predict(cov1_mod_prenat)
alpha_diversity_stress_usable$resid_cov1_mod_prenat <- resid(cov1_mod_prenat)

cov1_mod_postnat <- lm(shannon_entropy~postnatal_stress+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable, na.action = na.exclude)
alpha_diversity_stress_usable$pred_cov1_mod_postnat <- predict(cov1_mod_postnat)
alpha_diversity_stress_usable$resid_cov1_mod_postnat <- resid(cov1_mod_postnat)

#check homoskedasticity of residuals -- cov1 models
gf_point(resid_cov1_mod_precon~pred_cov1_mod_precon, data = alpha_diversity_stress_usable) 
gf_point(resid_cov1_mod_prenat~pred_cov1_mod_prenat, data = alpha_diversity_stress_usable) 
gf_point(resid_cov1_mod_postnat~pred_cov1_mod_postnat, data = alpha_diversity_stress_usable) 

#check linearity with respect to each predictor
gf_point(shannon_entropy~ctq_total_score_M54, data=alpha_diversity_stress_usable)
gf_point(shannon_entropy~STAI_prorated_state_pw26, data=alpha_diversity_stress_usable)
gf_violin(shannon_entropy~postnatal_stress, data=na.omit(alpha_diversity_stress_usable))

Conclusions: homoskadisticity of residuals is violated – will perform regression with HC3 standard errors

All Adversity Exposures

Shannon Index

#define models 
cov1_mod_mult <- lm(shannon_entropy~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult <- lm(shannon_entropy~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE)) 
  shannon entropy
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 3.35 0.25 0.18 0.14 2.86 – 3.85 -0.10 – 0.45 13.44 <0.001
sex binary [1] 0.05 0.09 0.08 0.15 -0.13 – 0.23 -0.21 – 0.37 0.53 0.595
deliv mode [1] -0.13 0.10 -0.20 0.17 -0.33 – 0.08 -0.53 – 0.13 -1.22 0.225
monthly income per member -0.00 0.00 -0.02 0.07 -0.00 – 0.00 -0.16 – 0.13 -0.23 0.816
Sequencing batch [batch2] -0.19 0.11 -0.31 0.18 -0.41 – 0.03 -0.65 – 0.04 -1.74 0.083
ctq total score M54 -0.00 0.01 -0.06 0.10 -0.02 – 0.01 -0.26 – 0.14 -0.59 0.555
STAI prorated state pw26 0.01 0.01 0.09 0.07 -0.00 – 0.02 -0.05 – 0.23 1.27 0.207
postnatal stress [1] -0.15 0.10 -0.24 0.16 -0.35 – 0.05 -0.55 – 0.08 -1.48 0.142
Observations 186
R2 / R2 adjusted 0.053 / 0.016
#knitr::knit_print will print the results (which is automatically generated as a .html, and not printed to console)
knitr::knit_print(tab_model(base_mod_mult, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE)) 
  shannon entropy
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 3.35 0.19 0.06 0.08 2.98 – 3.73 -0.10 – 0.23 17.66 <0.001
ctq total score M54 -0.01 0.01 -0.10 0.08 -0.02 – 0.00 -0.27 – 0.06 -1.22 0.223
STAI prorated state pw26 0.01 0.00 0.08 0.07 -0.00 – 0.01 -0.05 – 0.21 1.20 0.230
postnatal stress [1] -0.15 0.09 -0.24 0.15 -0.33 – 0.03 -0.53 – 0.06 -1.60 0.111
Observations 205
R2 / R2 adjusted 0.026 / 0.011
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_all_cov_complete <- #create complete cases dataset for this analysis
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member) &!is.na(Sequencing_batch)) 

eta_shannon_covs <- with(alpha_div_all_cov_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress,  sex_binary, deliv_mode, monthly_income_per_member, Sequencing_batch, shannon_entropy))$estimate^2)
kable(eta_shannon_covs[,ncol(eta_shannon_covs)], col.names="R^2 of each variable on shannon index") %>% kable_styling() #output to console the column that has effect sizes we are interested in (shannon entropy as the outcome)
R^2 of each variable on shannon index
ctq_total_score_M54 0.0033158
STAI_prorated_state_pw26 0.0077526
postnatal_stress 0.0107110
sex_binary 0.0015082
deliv_mode 0.0087480
monthly_income_per_member 0.0002920
Sequencing_batch 0.0191355
shannon_entropy 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_all_ncov_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress))

eta_shannon_ncovs <- with(alpha_div_all_ncov_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, shannon_entropy))$estimate^2)
kable(eta_shannon_ncovs[,ncol(eta_shannon_ncovs)], col.names="R^2 of each variable on shannon index") %>% kable_styling() #kable functions give the output nicer formatting
R^2 of each variable on shannon index
ctq_total_score_M54 0.0101649
STAI_prorated_state_pw26 0.0062660
postnatal_stress 0.0113086
shannon_entropy 1.0000000

Faith’s PD

#FAITH
#define models
cov1_mod_mult_faith <- lm(faith_pd~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_faith <- lm(faith_pd~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_covs.doc"))
  faith pd
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 7.12 0.74 0.16 0.14 5.67 – 8.58 -0.11 – 0.42 9.68 <0.001
sex binary [1] -0.27 0.29 -0.15 0.15 -0.83 – 0.30 -0.45 – 0.16 -0.94 0.349
deliv mode [1] -0.15 0.29 -0.08 0.16 -0.73 – 0.42 -0.39 – 0.23 -0.53 0.595
monthly income per member -0.00 0.00 -0.04 0.07 -0.00 – 0.00 -0.17 – 0.10 -0.53 0.596
Sequencing batch [batch2] 0.10 0.31 0.05 0.17 -0.52 – 0.72 -0.28 – 0.39 0.32 0.748
ctq total score M54 -0.01 0.01 -0.08 0.07 -0.04 – 0.01 -0.22 – 0.07 -1.06 0.292
STAI prorated state pw26 -0.01 0.01 -0.05 0.07 -0.04 – 0.02 -0.19 – 0.08 -0.76 0.446
postnatal stress [1] -0.55 0.27 -0.30 0.15 -1.08 – -0.02 -0.58 – -0.01 -2.05 0.041
Observations 186
R2 / R2 adjusted 0.036 / -0.002
knitr::knit_print(tab_model(base_mod_mult_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_nocovs.doc"))
  faith pd
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 6.80 0.57 0.08 0.09 5.69 – 7.92 -0.09 – 0.25 12.02 <0.001
ctq total score M54 -0.01 0.01 -0.09 0.06 -0.03 – 0.01 -0.20 – 0.03 -1.44 0.150
STAI prorated state pw26 -0.01 0.01 -0.06 0.06 -0.04 – 0.01 -0.18 – 0.07 -0.88 0.382
postnatal stress [1] -0.52 0.24 -0.29 0.14 -1.00 – -0.05 -0.56 – -0.03 -2.16 0.032
Observations 205
R2 / R2 adjusted 0.030 / 0.015
#find mean of faith pd by postnatal stress to see direction of effect
alpha_diversity_stress_usable %>% group_by(postnatal_stress) %>% summarise_at(vars(faith_pd), list(name = mean))
## # A tibble: 3 × 2
##   postnatal_stress  name
##   <fct>            <dbl>
## 1 0                 5.79
## 2 1                 5.38
## 3 <NA>              5.78
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_faith_all_cov_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch))

eta_faith_covs <- with(alpha_div_faith_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, faith_pd))$estimate^2) 
kable(eta_faith_covs[,ncol(eta_faith_covs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
R^2 of each variable on Faith’s PD
deliv_mode 0.0014699
sex_binary 0.0051425
monthly_income_per_member 0.0012725
Sequencing_batch 0.0006136
ctq_total_score_M54 0.0050787
STAI_prorated_state_pw26 0.0025229
postnatal_stress 0.0166291
faith_pd 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_faith_all_base_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress)) 

eta_faith_ncovs <- with(alpha_div_faith_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, faith_pd))$estimate^2) 
kable(eta_faith_ncovs[,ncol(eta_faith_ncovs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
R^2 of each variable on Faith’s PD
ctq_total_score_M54 0.0068937
STAI_prorated_state_pw26 0.0028924
postnatal_stress 0.0169593
faith_pd 1.0000000

Observed Features

#OBSERVED FEATURES
#define models
cov1_mod_mult_feat <- lm(observed_features~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_feat <- lm(observed_features~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_covs.doc"))
  observed features
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 86.96 7.28 0.27 0.14 72.59 – 101.34 -0.01 – 0.56 11.94 <0.001
sex binary [1] -5.60 2.66 -0.30 0.14 -10.84 – -0.36 -0.58 – -0.02 -2.11 0.036
deliv mode [1] -2.49 3.06 -0.13 0.16 -8.52 – 3.54 -0.46 – 0.19 -0.82 0.415
monthly income per member -0.00 0.00 -0.14 0.08 -0.01 – 0.00 -0.29 – 0.01 -1.88 0.062
Sequencing batch [batch2] -1.47 3.08 -0.08 0.17 -7.55 – 4.61 -0.41 – 0.25 -0.48 0.634
ctq total score M54 0.12 0.17 0.06 0.08 -0.21 – 0.45 -0.11 – 0.23 0.72 0.473
STAI prorated state pw26 -0.04 0.15 -0.02 0.07 -0.35 – 0.26 -0.17 – 0.12 -0.29 0.770
postnatal stress [1] -4.71 3.15 -0.25 0.17 -10.93 – 1.51 -0.59 – 0.08 -1.49 0.137
Observations 186
R2 / R2 adjusted 0.065 / 0.028
knitr::knit_print(tab_model(base_mod_mult_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_nocovs.doc"))
  observed features
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 82.35 5.68 0.05 0.08 71.16 – 93.54 -0.11 – 0.21 14.51 <0.001
ctq total score M54 -0.00 0.12 -0.00 0.07 -0.24 – 0.23 -0.14 – 0.14 -0.03 0.977
STAI prorated state pw26 -0.05 0.15 -0.02 0.07 -0.33 – 0.24 -0.16 – 0.12 -0.31 0.756
postnatal stress [1] -3.46 2.91 -0.19 0.16 -9.19 – 2.27 -0.50 – 0.12 -1.19 0.235
Observations 205
R2 / R2 adjusted 0.008 / -0.007
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_obs_all_base_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress)) 

eta_features_covs <- with(alpha_div_obs_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, observed_features))$estimate^2)
kable(eta_features_covs[,ncol(eta_features_covs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
R^2 of each variable on Observed Features
ctq_total_score_M54 0.0000037
STAI_prorated_state_pw26 0.0004633
postnatal_stress 0.0070236
observed_features 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
alpha_div_obs_all_cov_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(postnatal_stress) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch))

eta_features_ncovs <- with(alpha_div_obs_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, observed_features))$estimate^2) 
kable(eta_features_ncovs[,ncol(eta_features_ncovs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
R^2 of each variable on Observed Features
deliv_mode 0.0039110
sex_binary 0.0224025
monthly_income_per_member 0.0191220
Sequencing_batch 0.0013219
ctq_total_score_M54 0.0033355
STAI_prorated_state_pw26 0.0004448
postnatal_stress 0.0124610
observed_features 1.0000000

Pielou Evenness

#EVENNESS
#define models
cov1_mod_mult_even <- lm(pielou_evenness~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)
base_mod_mult_even <- lm(pielou_evenness~ctq_total_score_M54+STAI_prorated_state_pw26+postnatal_stress, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_mult_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/evenness_covs.doc"))
  pielou evenness
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 0.52 0.03 0.12 0.13 0.46 – 0.59 -0.14 – 0.39 15.76 <0.001
sex binary [1] 0.02 0.01 0.18 0.14 -0.01 – 0.04 -0.10 – 0.46 1.25 0.214
deliv mode [1] -0.02 0.01 -0.20 0.17 -0.04 – 0.01 -0.52 – 0.13 -1.20 0.231
monthly income per member 0.00 0.00 0.02 0.07 -0.00 – 0.00 -0.12 – 0.17 0.33 0.739
Sequencing batch [batch2] -0.03 0.01 -0.35 0.18 -0.06 – 0.00 -0.69 – 0.00 -1.96 0.052
ctq total score M54 -0.00 0.00 -0.09 0.11 -0.00 – 0.00 -0.31 – 0.12 -0.88 0.381
STAI prorated state pw26 0.00 0.00 0.11 0.07 -0.00 – 0.00 -0.03 – 0.25 1.55 0.124
postnatal stress [1] -0.02 0.01 -0.18 0.16 -0.04 – 0.01 -0.50 – 0.14 -1.10 0.271
Observations 186
R2 / R2 adjusted 0.067 / 0.030
knitr::knit_print(tab_model(base_mod_mult_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/evenness_nocovs.doc"))
  pielou evenness
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 0.53 0.03 0.05 0.08 0.48 – 0.58 -0.11 – 0.22 20.57 <0.001
ctq total score M54 -0.00 0.00 -0.12 0.09 -0.00 – 0.00 -0.30 – 0.05 -1.39 0.167
STAI prorated state pw26 0.00 0.00 0.10 0.07 -0.00 – 0.00 -0.03 – 0.23 1.48 0.141
postnatal stress [1] -0.02 0.01 -0.20 0.15 -0.04 – 0.01 -0.50 – 0.10 -1.33 0.187
Observations 205
R2 / R2 adjusted 0.029 / 0.014
#calculate effect sizes (eta squared/change in r squared) --  no covariates
eta_even_ncovs <- with(alpha_div_obs_all_base_complete, spcor(cbind(ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, pielou_evenness))$estimate^2) 
kable(eta_even_ncovs[,ncol(eta_even_ncovs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
R^2 of each variable on Pielou Evenness
ctq_total_score_M54 0.0144675
STAI_prorated_state_pw26 0.0094482
postnatal_stress 0.0080909
pielou_evenness 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_even_covs <- with(alpha_div_obs_all_cov_complete, spcor(cbind(deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, ctq_total_score_M54, STAI_prorated_state_pw26, postnatal_stress, pielou_evenness))$estimate^2) 
kable(eta_even_covs[,ncol(eta_even_covs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
R^2 of each variable on Pielou Evenness
deliv_mode 0.0085315
sex_binary 0.0081127
monthly_income_per_member 0.0005662
Sequencing_batch 0.0247096
ctq_total_score_M54 0.0081330
STAI_prorated_state_pw26 0.0117345
postnatal_stress 0.0062398
pielou_evenness 1.0000000

Cumulative Adversity

Shannon Index

#SHANNON INDEX
y <- favstats(shannon_entropy~cumulative_adv, data=alpha_diversity_stress_usable) #get group sizes

#define models
base_mod_cumulative0v2_sum <- lm(shannon_entropy~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_sum <- lm(shannon_entropy~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_sum, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/shannon_cumulative_covs.doc"))
  shannon entropy
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 3.41 0.13 0.15 0.17 3.16 – 3.67 -0.18 – 0.48 25.94 <0.001
cumulative 0v1TRUE 0.00 0.11 0.00 0.17 -0.21 – 0.22 -0.33 – 0.34 0.02 0.984
cumulative 0v2TRUE -0.12 0.13 -0.19 0.21 -0.38 – 0.14 -0.60 – 0.22 -0.91 0.364
deliv mode [1] -0.14 0.10 -0.22 0.17 -0.34 – 0.07 -0.55 – 0.11 -1.32 0.188
sex binary [1] 0.07 0.09 0.10 0.15 -0.12 – 0.25 -0.19 – 0.40 0.70 0.487
monthly income per member -0.00 0.00 -0.04 0.07 -0.00 – 0.00 -0.18 – 0.11 -0.47 0.635
Sequencing batch [batch2] -0.20 0.12 -0.31 0.18 -0.43 – 0.03 -0.68 – 0.05 -1.68 0.095
Observations 186
R2 / R2 adjusted 0.039 / 0.007
knitr::knit_print(tab_model(base_mod_cumulative0v2_sum, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/shannon_cumulative_nocovs.doc"))
  shannon entropy
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 3.32 0.07 0.07 0.11 3.19 – 3.45 -0.14 – 0.28 50.23 <0.001
cumulative 0v1TRUE -0.04 0.10 -0.06 0.16 -0.23 – 0.16 -0.37 – 0.25 -0.37 0.709
cumulative 0v2TRUE -0.14 0.12 -0.23 0.19 -0.37 – 0.09 -0.59 – 0.14 -1.21 0.226
Observations 205
R2 / R2 adjusted 0.007 / -0.002
#calculate effect sizes (eta squared/change in r squared) -- covariates
alpha_div_cumulative_complete <-
  alpha_diversity_stress_usable %>% 
  filter(!is.na(cumulative_adv) & !is.na(deliv_mode) & !is.na(sex_binary) &!is.na(monthly_income_per_member) & !is.na(Sequencing_batch)) #first, create dataset of complete cases (required for eta squared calculation)

eta_cumul_shannon_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, shannon_entropy))$estimate^2) 
kable(eta_cumul_shannon_covs[,ncol(eta_cumul_shannon_covs)], col.names="R^2 of each variable on Shannon Index") %>% kable_styling()
R^2 of each variable on Shannon Index
cumulative_0v1 0.0000019
cumulative_0v2 0.0040735
deliv_mode 0.0100456
sex_binary 0.0026858
monthly_income_per_member 0.0012375
Sequencing_batch 0.0191179
shannon_entropy 1.0000000
#calculate effect sizes (eta squared/ change in r squared) -- no covariates
alpha_div_cumulative_nocovs_complete <- 
  alpha_diversity_stress_usable %>% 
  filter(!is.na(cumulative_adv))

eta_cumul_shannon_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, shannon_entropy))$estimate^2)
kable(eta_cumul_shannon_ncovs[,ncol(eta_cumul_shannon_ncovs)], col.names="R^2 of each variable on Shannon Index") %>% kable_styling()
R^2 of each variable on Shannon Index
cumulative_0v1 0.0005724
cumulative_0v2 0.0060810
shannon_entropy 1.0000000

Faith’s PD

#FAITH PD
#define models
base_mod_cumulative0v2_faith <- lm(faith_pd~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_faith <- lm(faith_pd~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_cumulative_covs.doc"))
  faith pd
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 6.53 0.47 0.29 0.20 5.60 – 7.46 -0.10 – 0.68 13.91 <0.001
cumulative 0v1TRUE -0.59 0.35 -0.32 0.19 -1.29 – 0.10 -0.69 – 0.06 -1.68 0.095
cumulative 0v2TRUE -0.68 0.37 -0.37 0.20 -1.40 – 0.05 -0.76 – 0.03 -1.84 0.067
deliv mode [1] -0.14 0.29 -0.08 0.16 -0.72 – 0.43 -0.39 – 0.23 -0.50 0.620
sex binary [1] -0.28 0.28 -0.15 0.15 -0.83 – 0.27 -0.45 – 0.15 -1.00 0.321
monthly income per member -0.00 0.00 -0.05 0.07 -0.00 – 0.00 -0.19 – 0.09 -0.74 0.463
Sequencing batch [batch2] 0.04 0.32 0.02 0.17 -0.60 – 0.67 -0.32 – 0.36 0.12 0.905
Observations 186
R2 / R2 adjusted 0.035 / 0.003
knitr::knit_print(tab_model(base_mod_cumulative0v2_faith, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/faith_cumulative_nocovs.doc"))
  faith pd
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 6.13 0.25 0.20 0.14 5.65 – 6.62 -0.07 – 0.47 24.85 <0.001
cumulative 0v1TRUE -0.55 0.29 -0.31 0.16 -1.12 – 0.03 -0.63 – 0.02 -1.86 0.065
cumulative 0v2TRUE -0.67 0.32 -0.38 0.18 -1.30 – -0.04 -0.73 – -0.02 -2.09 0.038
Observations 205
R2 / R2 adjusted 0.027 / 0.018
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_faith_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, faith_pd))$estimate^2) 
kable(eta_cumul_faith_covs[,ncol(eta_cumul_faith_covs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
R^2 of each variable on Faith’s PD
cumulative_0v1 0.0156999
cumulative_0v2 0.0149369
deliv_mode 0.0012919
sex_binary 0.0056130
monthly_income_per_member 0.0027463
Sequencing_batch 0.0000874
faith_pd 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_faith_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, faith_pd))$estimate^2)
kable(eta_cumul_faith_ncovs[,ncol(eta_cumul_faith_ncovs)], col.names="R^2 of each variable on Faith's PD") %>% kable_styling()
R^2 of each variable on Faith’s PD
cumulative_0v1 0.0152315
cumulative_0v2 0.0169529
faith_pd 1.0000000

Observed Features

#OBSERVED FEATURES
#define models
base_mod_cumulative_feat <- lm(observed_features~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative_feat <- lm(observed_features~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(cov1_mod_cumulative_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_cumulative_covs.doc"))
  observed features
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 87.35 4.19 0.15 0.17 79.08 – 95.62 -0.19 – 0.48 20.85 <0.001
cumulative 0v1TRUE 2.47 3.04 0.13 0.16 -3.52 – 8.46 -0.19 – 0.46 0.81 0.416
cumulative 0v2TRUE 0.54 4.06 0.03 0.22 -7.47 – 8.55 -0.40 – 0.46 0.13 0.894
deliv mode [1] -3.15 3.08 -0.17 0.17 -9.24 – 2.94 -0.50 – 0.16 -1.02 0.308
sex binary [1] -5.40 2.71 -0.29 0.15 -10.74 – -0.06 -0.58 – -0.00 -1.99 0.048
monthly income per member -0.00 0.00 -0.14 0.08 -0.01 – 0.00 -0.28 – 0.01 -1.81 0.072
Sequencing batch [batch2] -0.83 3.11 -0.04 0.17 -6.96 – 5.31 -0.37 – 0.29 -0.27 0.791
Observations 186
R2 / R2 adjusted 0.053 / 0.021
knitr::knit_print(tab_model(base_mod_cumulative_feat, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/observed_cumulative_nocovs.doc"))
  observed features
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 79.00 1.83 -0.05 0.10 75.39 – 82.61 -0.24 – 0.15 43.11 <0.001
cumulative 0v1TRUE 2.17 2.77 0.12 0.15 -3.30 – 7.64 -0.18 – 0.42 0.78 0.435
cumulative 0v2TRUE 0.11 3.66 0.01 0.20 -7.11 – 7.33 -0.39 – 0.40 0.03 0.976
Observations 205
R2 / R2 adjusted 0.003 / -0.007
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_feat_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, observed_features))$estimate^2) 
kable(eta_cumul_feat_covs[,ncol(eta_cumul_feat_covs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
R^2 of each variable on Observed Features
cumulative_0v1 0.0028212
cumulative_0v2 0.0000986
deliv_mode 0.0061449
sex_binary 0.0209539
monthly_income_per_member 0.0182633
Sequencing_batch 0.0004046
observed_features 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_feat_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, observed_features))$estimate^2)
kable(eta_cumul_feat_ncovs[,ncol(eta_cumul_feat_ncovs)], col.names="R^2 of each variable on Observed Features") %>% kable_styling()
R^2 of each variable on Observed Features
cumulative_0v1 0.0022817
cumulative_0v2 0.0000043
observed_features 1.0000000

Pielou Evenness

#PIELOU EVENNESS
#define models
base_mod_cumulative0v2_even <- lm(pielou_evenness~cumulative_0v1 + cumulative_0v2, data = alpha_diversity_stress_usable)
cov1_mod_cumulative0v2_even <- lm(pielou_evenness~cumulative_0v1 + cumulative_0v2+deliv_mode+sex_binary+monthly_income_per_member+Sequencing_batch, data = alpha_diversity_stress_usable)

#run models (HC3 robust standard errors, showing standardized SE and coefficients, and saving to file)
knitr::knit_print(tab_model(base_mod_cumulative0v2_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/even_cumulative_nocovs.doc"))
  pielou evenness
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 0.53 0.01 0.10 0.10 0.51 – 0.54 -0.11 – 0.30 59.66 <0.001
cumulative 0v1TRUE -0.01 0.01 -0.11 0.16 -0.04 – 0.02 -0.42 – 0.20 -0.69 0.491
cumulative 0v2TRUE -0.02 0.02 -0.25 0.19 -0.05 – 0.01 -0.62 – 0.12 -1.35 0.179
Observations 205
R2 / R2 adjusted 0.009 / -0.001
knitr::knit_print(tab_model(cov1_mod_cumulative0v2_even, robust=TRUE, show.se=TRUE, show.std=TRUE, show.stat=TRUE, file="../../manuscripts/fran_adversity_microbiome/tables/alpha_diversity/even_cumulative_covs.doc"))
  pielou evenness
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 0.53 0.02 0.14 0.16 0.50 – 0.56 -0.18 – 0.45 32.38 <0.001
cumulative 0v1TRUE -0.00 0.01 -0.05 0.17 -0.03 – 0.02 -0.38 – 0.29 -0.27 0.789
cumulative 0v2TRUE -0.02 0.02 -0.22 0.20 -0.05 – 0.02 -0.62 – 0.18 -1.07 0.286
deliv mode [1] -0.02 0.01 -0.21 0.16 -0.04 – 0.01 -0.53 – 0.12 -1.25 0.211
sex binary [1] 0.02 0.01 0.21 0.15 -0.01 – 0.04 -0.08 – 0.50 1.40 0.163
monthly income per member 0.00 0.00 0.00 0.07 -0.00 – 0.00 -0.14 – 0.14 0.02 0.985
Sequencing batch [batch2] -0.03 0.02 -0.36 0.19 -0.06 – 0.00 -0.73 – 0.01 -1.95 0.053
Observations 186
R2 / R2 adjusted 0.051 / 0.019
#calculate effect sizes (eta squared/change in r squared) -- covariates
eta_cumul_even_covs <- with(alpha_div_cumulative_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, deliv_mode, sex_binary, monthly_income_per_member, Sequencing_batch, pielou_evenness))$estimate^2) 
kable(eta_cumul_even_covs[,ncol(eta_cumul_even_covs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
R^2 of each variable on Pielou Evenness
cumulative_0v1 0.0003256
cumulative_0v2 0.0055022
deliv_mode 0.0089445
sex_binary 0.0106898
monthly_income_per_member 0.0000018
Sequencing_batch 0.0263969
pielou_evenness 1.0000000
#calculate effect sizes (eta squared/change in r squared) -- no covariates
eta_cumul_even_ncovs <- with(alpha_div_cumulative_nocovs_complete, spcor(cbind(cumulative_0v1, cumulative_0v2, pielou_evenness))$estimate^2)
kable(eta_cumul_even_ncovs[,ncol(eta_cumul_even_ncovs)], col.names="R^2 of each variable on Pielou Evenness") %>% kable_styling()
R^2 of each variable on Pielou Evenness
cumulative_0v1 0.0019474
cumulative_0v2 0.0074880
pielou_evenness 1.0000000

Alpha Diversity Visualizations

Scatterplots & Boxplots: Alpha Diversity by Adversity

#postnatal adversity
d_postnatal <- alpha_diversity_stress_usable %>% dplyr::select(subjid, postnatal_stress, shannon_entropy, faith_pd, pielou_evenness, observed_features) #grab only needed vars
# Than transform the data to long format as stated already in the comments using reshape function melt
d_postnatal <- d_postnatal %>% 
  mutate(
    postnatal_stress_s = case_when(
      postnatal_stress==1 ~ "yes",
      postnatal_stress==0 ~ "no"
    ) 
  ) %>% 
  dplyr::select(
      -postnatal_stress
    )

d_postnatal_long <- melt(d_postnatal)
## Using subjid, postnatal_stress_s as id variables
d_postnatal_long_complete <- d_postnatal_long %>% filter(across(.cols=everything(), ~!is.na(.x))) #create complete dataset

#Boxplots
ggplot(d_postnatal_long_complete, aes(y = value, x = postnatal_stress_s, colour = postnatal_stress_s)) + 
  geom_jitter() +
  geom_boxplot(alpha=0.3, outlier.shape=NA) +
  xlab("Postnatal Stress Exposure") + 
  ylab("Value") +
  facet_wrap( ~ variable, scales = "free", labeller= as_labeller(c('shannon_entropy' = "Shannon's Index", 'faith_pd' = "Faith's PD", 'pielou_evenness' = "Pielou Evenness", 'observed_features' = "Observed Features"))) + 
  theme_bw() +
  stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=10) +
  scale_color_discrete("Postnatal Stress Exposure", labels = c("no (N = 233)", "yes (N= 76)")) 

#Prenatal Adversity
grid.arrange(ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, shannon_entropy))+geom_point(size=1)+geom_smooth(method="lm") + labs(x="Prenatal Adversity Score", y= "Shannon Index"),
             ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, faith_pd))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Faith's PD"),
             ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, pielou_evenness))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Pielou Evenness"),
             ggplot(alpha_diversity_stress_usable, aes(STAI_prorated_state_pw26, observed_features))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Observed Features"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#Preconception adversity
grid.arrange(ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, shannon_entropy))+geom_point(size=1)+geom_smooth(method="lm") + labs(x="Preconception Adversity Score", y= "Shannon Index"),
             ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, faith_pd))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Prenatal Adversity Score", y="Faith's PD"),
             ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, pielou_evenness))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Preconception Adversity Score", y="Pielou Evenness"),
             ggplot(alpha_diversity_stress_usable, aes(ctq_total_score_M54, observed_features))+geom_point(size=1)+geom_smooth(method="lm")+ labs(x="Preconception Adversity Score", y="Observed Features"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

#cumulative adversity
d_cumulative <- alpha_diversity_stress_usable %>% dplyr::select(subjid, cumulative_adv, shannon_entropy, faith_pd, pielou_evenness, observed_features) 
d_cumulative_long <- melt(d_cumulative)
## Using subjid, cumulative_adv as id variables
d_cumulative_long_complete <- d_cumulative_long %>% filter(across(.cols=everything(), ~!is.na(.x))) #create complete dataset

#Boxplots
ggplot(d_cumulative_long_complete, aes(y = value, x = cumulative_adv, colour = cumulative_adv)) + 
  geom_jitter() +
  geom_boxplot(alpha=0.3, outlier.shape=NA) +
  xlab("Cumulative Adversity Exposure") + 
  ylab("Value") +
  facet_wrap( ~ variable, scales = "free", labeller= as_labeller(c('shannon_entropy' = "Shannon Index", 'faith_pd' = "Faith's PD", 'pielou_evenness' = "Pielou Evenness", 'observed_features' = "Observed Features"))) + 
  theme_bw() +
  stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=10) +
  scale_color_discrete("Cumulative Adversity Exposure", labels = c("0 timepoints", "1 timepoint", "2+ timepoints")) 

### Figure 1: Faith’s PD is Lower Among Children with Higher Postnatal Adversity

#create group means dataset for a quick look at magnitude of effect
faith_means <- alpha_diversity_stress_usable %>% 
  filter(!is.na(postnatal_stress)) %>% 
        group_by(postnatal_stress) %>% 
        summarise(faith_pd = mean(faith_pd))

#add postnatal stress string variable to make plotting easier
alpha_diversity_stress_usable <-
  alpha_diversity_stress_usable %>% 
  mutate(
    postnatal_stress_str = case_when(
      postnatal_stress == 0 ~ "no",
      postnatal_stress == 1 ~ "yes"
    )
  )

#create residual for faith pd after removing variance associated with covariates
faith_resid <- lm(faith_pd ~sex_binary+deliv_mode+monthly_income_per_member+Sequencing_batch+ctq_total_score_M54+STAI_prorated_state_pw26, data = alpha_diversity_stress_usable)

alpha_diversity_stress_usable_faith <- 
  alpha_diversity_stress_usable %>% 
  filter(!is.na(sex_binary) & !is.na(deliv_mode) &!is.na(monthly_income_per_member)&!is.na(Sequencing_batch) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) %>% 
  mutate(
    faith_resid = resid(faith_resid)
  ) 

#determine Ns for the graph labels
alpha_diversity_stress_usable_faith %>% dplyr::count(postnatal_stress==1)
##   postnatal_stress == 1   n
## 1                 FALSE 138
## 2                  TRUE  48
## 3                    NA  63
#boxplot with faith by postnatal adversity (raw values)
ggplot(alpha_diversity_stress_usable %>% filter(!is.na(postnatal_stress_str)), aes(y = faith_pd, x = postnatal_stress_str, color = postnatal_stress_str)) +
geom_point(position=position_jitter(w=0.3, h=0)) +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Postnatal Adversity Exposure") + 
ylab("Faith's Phylogenetic Diversity") +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=20) +
  scale_color_discrete("Postnatal Adversity Exposure", labels = c("no (N = 138)", "yes (N = 48)"))

#boxplot with faith by postnatal adversity (partial correlations)
ggplot(alpha_diversity_stress_usable_faith %>% filter(!is.na(postnatal_stress_str)), aes(y = faith_resid, x = postnatal_stress_str, color = postnatal_stress_str)) +
geom_point(position=position_jitter(w=0.3, h=0)) +
geom_boxplot(alpha=0.3, outlier.shape=NA) +
xlab("Postnatal Adversity Exposure") + 
  scale_x_discrete("Postnatal Adversity Exposure", labels = c("no" = "No (N=138)", "yes" = "Yes (N=48)")
  ) +
ylab("Faith's Phylogenetic Diversity (residuals)") +
stat_summary(fun=mean, colour='royalblue3', geom='point', shape='-', size=20) +
theme_minimal() +
theme(#legend.position="none",#hide grid lines
  panel.grid.major.x = element_blank(),
  panel.grid.minor.x = element_blank(),
  panel.grid.major.y = element_blank(),
  panel.grid.minor.y = element_blank(),
)

#save graph with Faith PD residuals for inclusion in manuscript
#ggsave("../../manuscripts/fran_adversity_microbiome/figures/faith_postnat_resid.png", height=4, width=8)

Beta Diversity

Qiime forum with explanation of PERMANOVA and permdisp tests: https://forum.qiime2.org/t/understanding-beta-group-significance-permanova-results/12648/4

  • Permanova asks if there is a difference in either within or between group distance for any of the groups.
  • Permdisp tests the hypothesis that there is a significant difference in within group variance.
  • So, if you see a large signal in permanova and a small one in permdisp, that suggests that the difference is driven by differences between communities compared to differences within one of the communities.

Info about effect size in adonis (R^2): http://qiime.org/tutorials/category_comparison.html
### Covariate Selection Potential covariates: sex_binary, delivery mode, breastfeeding vars, ethnicity, antibiotics, probiotics, sequencing batch

Child Sex

physeq_M24_sex <- subset_samples(physeq_M24, sex_binary != "NA") #create a dataset that is complete on sex; necessary for distance matrix calculations to work

#get distance matrices (weighted unifrac, unweighted unifrac, bray-curtis, jaccard)
set.seed(main.seed) #need to run this right before all Unifrac distance matrices calculations 
M24_wunifrac_sex <- phyloseq::distance(physeq_M24_sex, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_sex <- phyloseq::distance(physeq_M24_sex, method = "unifrac", type = "samples")
M24_jaccard_sex <- phyloseq::distance(physeq_M24_sex, method = "jaccard", type = "samples")
M24_bc_sex <- phyloseq::distance(physeq_M24_sex, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_sex <- data.frame(sample_data(physeq_M24_sex)) #get data frame of metadata

#perform PERMANOVAs with each distance matrix
set.seed(main.seed) #need to run this before Unifrac distance matrices calculations 
test <- adonis(M24_wunifrac_sex~sex_binary, data=metadata_physeq_M24_sex)#ns
set.seed(main.seed)
adonis(M24_unifrac_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
## 
## Call:
## adonis(formula = M24_unifrac_sex ~ sex_binary, data = metadata_physeq_M24_sex) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## sex_binary   1     0.184 0.18370  1.2401 0.00267   0.16
## Residuals  463    68.585 0.14813         0.99733       
## Total      464    68.769                 1.00000
adonis(M24_jaccard_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
## 
## Call:
## adonis(formula = M24_jaccard_sex ~ sex_binary, data = metadata_physeq_M24_sex) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)
## sex_binary   1     0.301 0.30125 0.88172 0.0019  0.681
## Residuals  463   158.191 0.34167         0.9981       
## Total      464   158.492                 1.0000
adonis(M24_bc_sex~sex_binary, data=metadata_physeq_M24_sex) #ns
## 
## Call:
## adonis(formula = M24_bc_sex ~ sex_binary, data = metadata_physeq_M24_sex) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## sex_binary   1     0.235 0.23465 0.89675 0.00193  0.559
## Residuals  463   121.153 0.26167         0.99807       
## Total      464   121.388                 1.00000

Antibiotic Use

#ANTIBIOTICS
physeq_M24_antibio <- subset_samples(physeq_M24, antibio_M24 != "NA")

#calculate distance matrices
set.seed(main.seed) 
M24_wunifrac_antibio <- phyloseq::distance(physeq_M24_antibio, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_antibio <- phyloseq::distance(physeq_M24_antibio, method = "unifrac", type = "samples")
M24_jaccard_antibio <- phyloseq::distance(physeq_M24_antibio, method = "jaccard", type = "samples")
M24_bc_antibio <- phyloseq::distance(physeq_M24_antibio, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_antibio <- data.frame(sample_data(physeq_M24_antibio))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #sig
## 
## Call:
## adonis(formula = M24_wunifrac_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)   
## antibio_M24   1     0.224 0.224326  3.3052 0.00709  0.004 **
## Residuals   463    31.424 0.067871         0.99291          
## Total       464    31.649                  1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_unifrac_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
## 
## Call:
## adonis(formula = M24_unifrac_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## antibio_M24   1     0.135 0.13542 0.91353 0.00197   0.54
## Residuals   463    68.634 0.14824         0.99803       
## Total       464    68.769                 1.00000
adonis(M24_jaccard_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
## 
## Call:
## adonis(formula = M24_jaccard_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## antibio_M24   1     0.475 0.47496  1.3917 0.003   0.06 .
## Residuals   463   158.018 0.34129         0.997         
## Total       464   158.492                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_antibio~antibio_M24, data=metadata_physeq_M24_antibio) #ns
## 
## Call:
## adonis(formula = M24_bc_antibio ~ antibio_M24, data = metadata_physeq_M24_antibio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## antibio_M24   1      0.43 0.42988  1.6455 0.00354  0.053 .
## Residuals   463    120.96 0.26125         0.99646         
## Total       464    121.39                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Probiotic Use

#PROBIOTICS
physeq_M24_probio <- subset_samples(physeq_M24, probiotics_M24 != "NA")

#calculate distance matrices
set.seed(main.seed) 
M24_wunifrac_probio <- phyloseq::distance(physeq_M24_probio, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_probio <- phyloseq::distance(physeq_M24_probio, method = "unifrac", type = "samples")
M24_jaccard_probio <- phyloseq::distance(physeq_M24_probio, method = "jaccard", type = "samples")
M24_bc_probio <- phyloseq::distance(physeq_M24_probio, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_probio <- data.frame(sample_data(physeq_M24_probio))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
## 
## Call:
## adonis(formula = M24_wunifrac_probio ~ probiotics_M24, data = metadata_physeq_M24_probio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## probiotics_M24   1     0.118 0.11801  1.7393 0.00383  0.117
## Residuals      452    30.668 0.06785         0.99617       
## Total          453    30.786                 1.00000
adonis(M24_unifrac_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
## 
## Call:
## adonis(formula = M24_unifrac_probio ~ probiotics_M24, data = metadata_physeq_M24_probio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## probiotics_M24   1     0.115 0.11527  0.7744 0.00171  0.778
## Residuals      452    67.281 0.14885         0.99829       
## Total          453    67.396                 1.00000
adonis(M24_jaccard_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
## 
## Call:
## adonis(formula = M24_jaccard_probio ~ probiotics_M24, data = metadata_physeq_M24_probio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## probiotics_M24   1     0.366 0.36588  1.0712 0.00236  0.322
## Residuals      452   154.392 0.34158         0.99764       
## Total          453   154.758                 1.00000
adonis(M24_bc_probio~probiotics_M24, data=metadata_physeq_M24_probio) #ns
## 
## Call:
## adonis(formula = M24_bc_probio ~ probiotics_M24, data = metadata_physeq_M24_probio) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## probiotics_M24   1     0.302 0.30243  1.1556 0.00255  0.256
## Residuals      452   118.288 0.26170         0.99745       
## Total          453   118.591                 1.00000

Delivery Mode

#DELIVERY MODE 
physeq_M24_dm <- subset_samples(physeq_M24, deliv_mode != "NA")

#calculate distance matrices
set.seed(main.seed) 
M24_wunifrac_dm <- phyloseq::distance(physeq_M24_dm, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_dm <- phyloseq::distance(physeq_M24_dm, method = "unifrac", type = "samples")
M24_jaccard_dm <- phyloseq::distance(physeq_M24_dm, method = "jaccard", type = "samples")
M24_bc_dm <- phyloseq::distance(physeq_M24_dm, method = "bray", type = "samples")
#get dataframe of metadata
metadata_physeq_M24_dm <- data.frame(sample_data(physeq_M24_dm))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_dm~deliv_mode, data=metadata_physeq_M24_dm)#ns
## 
## Call:
## adonis(formula = M24_wunifrac_dm ~ deliv_mode, data = metadata_physeq_M24_dm) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## deliv_mode   1     0.047 0.046999 0.68859 0.00149  0.671
## Residuals  463    31.602 0.068254         0.99851       
## Total      464    31.649                  1.00000
set.seed(main.seed)
adonis(M24_unifrac_dm~deliv_mode, data=metadata_physeq_M24_dm) #significant
## 
## Call:
## adonis(formula = M24_unifrac_dm ~ deliv_mode, data = metadata_physeq_M24_dm) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## deliv_mode   1     0.318 0.31815  2.1519 0.00463   0.01 **
## Residuals  463    68.451 0.14784         0.99537          
## Total      464    68.769                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_dm~deliv_mode, data=metadata_physeq_M24_dm) #ns
## 
## Call:
## adonis(formula = M24_jaccard_dm ~ deliv_mode, data = metadata_physeq_M24_dm) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## deliv_mode   1     0.336 0.33620 0.98422 0.00212  0.472
## Residuals  463   158.156 0.34159         0.99788       
## Total      464   158.492                 1.00000
adonis(M24_bc_dm~deliv_mode, data=metadata_physeq_M24_dm) #ns
## 
## Call:
## adonis(formula = M24_bc_dm ~ deliv_mode, data = metadata_physeq_M24_dm) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##             Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## deliv_mode   1     0.241 0.24074 0.92007 0.00198   0.51
## Residuals  463   121.147 0.26166         0.99802       
## Total      464   121.388                 1.00000

Child Ethnicity

#ETHNICITY
physeq_M24_eth <- subset_samples(physeq_M24, mother_ethnicity != "NA")

#calculate distance matrices
set.seed(main.seed)  
M24_wunifrac_eth <- phyloseq::distance(physeq_M24_eth, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_eth <- phyloseq::distance(physeq_M24_eth, method = "unifrac", type = "samples")
M24_jaccard_eth <- phyloseq::distance(physeq_M24_eth, method = "jaccard", type = "samples")
M24_bc_eth <- phyloseq::distance(physeq_M24_eth, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_eth <- data.frame(sample_data(physeq_M24_eth))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #ns
## 
## Call:
## adonis(formula = M24_wunifrac_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs  MeanSqs F.Model     R2 Pr(>F)
## mother_ethnicity   2     0.142 0.071237  1.0446 0.0045  0.395
## Residuals        462    31.506 0.068195         0.9955       
## Total            464    31.649                  1.0000
adonis(M24_unifrac_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #significant
## 
## Call:
## adonis(formula = M24_unifrac_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## mother_ethnicity   2     0.445 0.22238  1.5037 0.00647  0.024 *
## Residuals        462    68.324 0.14789         0.99353         
## Total            464    68.769                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #ns
## 
## Call:
## adonis(formula = M24_jaccard_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## mother_ethnicity   2     0.914 0.45698  1.3398 0.00577  0.053 .
## Residuals        462   157.579 0.34108         0.99423         
## Total            464   158.492                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_eth~mother_ethnicity, data=metadata_physeq_M24_eth) #significant
## 
## Call:
## adonis(formula = M24_bc_eth ~ mother_ethnicity, data = metadata_physeq_M24_eth) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## mother_ethnicity   2      0.78 0.39005  1.4941 0.00643  0.044 *
## Residuals        462    120.61 0.26106         0.99357         
## Total            464    121.39                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Sequencing Batch

#SEQUENCING BATCH
physeq_M24_seqba <- subset_samples(physeq_M24, Sequencing_batch != "NA")

#calculate distance matrices
set.seed(main.seed) 
M24_wunifrac_seqba <- phyloseq::distance(physeq_M24_seqba, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_seqba <- phyloseq::distance(physeq_M24_seqba, method = "unifrac", type = "samples")
M24_jaccard_seqba <- phyloseq::distance(physeq_M24_seqba, method = "jaccard", type = "samples")
M24_bc_seqba <- phyloseq::distance(physeq_M24_seqba, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_seqba <- data.frame(sample_data(physeq_M24_seqba))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #ns
## 
## Call:
## adonis(formula = M24_wunifrac_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## Sequencing_batch   1     0.054 0.053759 0.78599 0.00169  0.565
## Residuals        465    31.805 0.068397         0.99831       
## Total            466    31.858                  1.00000
adonis(M24_unifrac_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
## 
## Call:
## adonis(formula = M24_unifrac_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Sequencing_batch   1     1.037 1.03698   7.087 0.01501  0.001 ***
## Residuals        465    68.039 0.14632         0.98499           
## Total            466    69.076                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
## 
## Call:
## adonis(formula = M24_jaccard_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## Sequencing_batch   1     0.863 0.86319  2.5336 0.00542  0.002 **
## Residuals        465   158.426 0.34070         0.99458          
## Total            466   159.289                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_seqba~Sequencing_batch, data=metadata_physeq_M24_seqba) #sig
## 
## Call:
## adonis(formula = M24_bc_seqba ~ Sequencing_batch, data = metadata_physeq_M24_seqba) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Sequencing_batch   1     0.836 0.83610  3.2081 0.00685  0.001 ***
## Residuals        465   121.190 0.26062         0.99315           
## Total            466   122.026                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Duration of Any and Exclusive Breastfeeding

#DURATION OF EXCLUSIVE BREASTFEEDING
physeq_M24_ebf <- subset_samples(physeq_M24, only_bf_months != "NA")

#calculate distance matrices
set.seed(main.seed) 
M24_wunifrac_ebf <- phyloseq::distance(physeq_M24_ebf, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_ebf <- phyloseq::distance(physeq_M24_ebf, method = "unifrac", type = "samples")
M24_jaccard_ebf <- phyloseq::distance(physeq_M24_ebf, method = "jaccard", type = "samples")
M24_bc_ebf <- phyloseq::distance(physeq_M24_ebf, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_ebf <- data.frame(sample_data(physeq_M24_ebf))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_ebf~only_bf_months, data=metadata_physeq_M24_ebf)#significant
## 
## Call:
## adonis(formula = M24_wunifrac_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs  MeanSqs F.Model     R2 Pr(>F)   
## only_bf_months   3     0.414 0.137992   2.057 0.0135  0.009 **
## Residuals      451    30.255 0.067084         0.9865          
## Total          454    30.669                  1.0000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_unifrac_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
## 
## Call:
## adonis(formula = M24_unifrac_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## only_bf_months   3     0.610 0.20323  1.3748 0.00906  0.038 *
## Residuals      451    66.669 0.14783         0.99094         
## Total          454    67.279                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_jaccard_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
## 
## Call:
## adonis(formula = M24_jaccard_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## only_bf_months   3     1.751 0.58356  1.7238 0.01134  0.002 **
## Residuals      451   152.680 0.33854         0.98866          
## Total          454   154.431                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_ebf~only_bf_months, data=metadata_physeq_M24_ebf) #significant
## 
## Call:
## adonis(formula = M24_bc_ebf ~ only_bf_months, data = metadata_physeq_M24_ebf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## only_bf_months   3     1.554 0.51811  2.0073 0.01318  0.002 **
## Residuals      451   116.407 0.25811         0.98682          
## Total          454   117.961                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#DURATION OF ANY BREASTFEEDING
physeq_M24_abf <- subset_samples(physeq_M24, any_bf_months != "NA")

#calculate distance matrices
set.seed(main.seed)  
M24_wunifrac_abf <- phyloseq::distance(physeq_M24_abf, method = "wunifrac", type = "samples")
set.seed(main.seed) 
M24_unifrac_abf <- phyloseq::distance(physeq_M24_abf, method = "unifrac", type = "samples")
M24_jaccard_abf <- phyloseq::distance(physeq_M24_abf, method = "jaccard", type = "samples")
M24_bc_abf <- phyloseq::distance(physeq_M24_abf, method = "bray", type = "samples")

#get dataframe of metadata
metadata_physeq_M24_abf <- data.frame(sample_data(physeq_M24_abf))

#perform PERMANOVAs with each distance matrix
set.seed(main.seed)
adonis(M24_wunifrac_abf~any_bf_months, data=metadata_physeq_M24_abf) #sig
## 
## Call:
## adonis(formula = M24_wunifrac_abf ~ any_bf_months, data = metadata_physeq_M24_abf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)  
## any_bf_months   4    0.4316 0.107899  1.5972 0.01406  0.048 *
## Residuals     448   30.2657 0.067557         0.98594         
## Total         452   30.6973                  1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(main.seed)
adonis(M24_unifrac_abf~any_bf_months, data=metadata_physeq_M24_abf) 
## 
## Call:
## adonis(formula = M24_unifrac_abf ~ any_bf_months, data = metadata_physeq_M24_abf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## any_bf_months   4     0.633 0.15833  1.0681 0.00945  0.301
## Residuals     448    66.409 0.14823         0.99055       
## Total         452    67.042                 1.00000
adonis(M24_jaccard_abf~any_bf_months, data=metadata_physeq_M24_abf) 
## 
## Call:
## adonis(formula = M24_jaccard_abf ~ any_bf_months, data = metadata_physeq_M24_abf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)   
## any_bf_months   4     1.971 0.49272  1.4525 0.0128  0.003 **
## Residuals     448   151.970 0.33922         0.9872          
## Total         452   153.940                 1.0000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(M24_bc_abf~any_bf_months, data=metadata_physeq_M24_abf) 
## 
## Call:
## adonis(formula = M24_bc_abf ~ any_bf_months, data = metadata_physeq_M24_abf) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## any_bf_months   4     1.789 0.44731  1.7296 0.01521  0.005 **
## Residuals     448   115.859 0.25861         0.98479          
## Total         452   117.649                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Determine if breastfeeding variables collinear; if so, only use one
alpha_diversity_stress_usable %$%
  ctable(only_bf_months, any_bf_months,
    prop = "r", chisq = TRUE, headings = FALSE
  ) %>%
  print(
    method = "render",
    style = "rmarkdown",
    footnote = NA
  )
any_bf_months
only_bf_months <1M >12M 1M_to_3M 3M_to_6M 6M_to_12M <NA> Total
<1M 82 ( 24.6% ) 39 ( 11.7% ) 72 ( 21.6% ) 70 ( 21.0% ) 65 ( 19.5% ) 6 ( 1.8% ) 334 ( 100.0% )
1M_to_3M 0 ( 0.0% ) 3 ( 21.4% ) 0 ( 0.0% ) 6 ( 42.9% ) 4 ( 28.6% ) 1 ( 7.1% ) 14 ( 100.0% )
3M_to_6M 0 ( 0.0% ) 26 ( 59.1% ) 0 ( 0.0% ) 4 ( 9.1% ) 14 ( 31.8% ) 0 ( 0.0% ) 44 ( 100.0% )
6M_to_12M 0 ( 0.0% ) 36 ( 75.0% ) 0 ( 0.0% ) 0 ( 0.0% ) 12 ( 25.0% ) 0 ( 0.0% ) 48 ( 100.0% )
<NA> 0 ( 0.0% ) 0 ( 0.0% ) 0 ( 0.0% ) 0 ( 0.0% ) 5 ( 50.0% ) 5 ( 50.0% ) 10 ( 100.0% )
Total 82 ( 18.2% ) 104 ( 23.1% ) 72 ( 16.0% ) 80 ( 17.8% ) 100 ( 22.2% ) 12 ( 2.7% ) 450 ( 100.0% )
 Χ2 = 164.9789   df = 12   p = .0000

Summary

  • Significant covariates: ethnicity, delivery mode, duration of exclusive breastfeeding, antibiotics, sequencing batch
  • Even though both duration of exclusive and partial breastfeeding were significant for the sake of model parsimony only used duration of exclsuive breastfeeding because the two breastfeeing variables are associated with each other and duration of exclusive was sig for more beta diversity metrics

All Adversity Exposures

Including Covariates

#INCLUDING COVARIATES
#create a phyloseq object with complete cases on adversity variables and covaraites
physeq_M24_all <- subset_samples(physeq_M24, !is.na(postnatal_stress) & !is.na(maltreated_mod_M54) & !is.na(stai_state_above_clin_cutoff_pw26) & !is.na(only_bf_months) & !is.na(deliv_mode) & !is.na(mother_ethnicity) & !is.na(antibio_M24) & !is.na(Sequencing_batch)) #N=204 for this analysis

#calculate distance matrices using this complete cases dataset
set.seed(main.seed)
M24_wunifrac_all <- phyloseq::distance(physeq_M24_all, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_all <- phyloseq::distance(physeq_M24_all, method = "unifrac", type = "samples")
M24_bc_all <- phyloseq::distance(physeq_M24_all, method = "bray", type = "samples")
M24_jaccard_all <- phyloseq::distance(physeq_M24_all, method = "jaccard", type = "samples")

#get metadata
metadata_physeq_M24_all <- data.frame(sample_data(physeq_M24_all)) #get data frame of metadata

#test assumption of betadispersion
disp_all_maltreated <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$maltreated_mod_M54)
permutest(disp_all_maltreated) #yes, apparently they do have homogeneity of dispersion
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups      1 0.00716 0.0071600 0.9545    999  0.338
## Residuals 202 1.51526 0.0075013
disp_all_stai <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$stai_state_above_clin_cutoff_pw26)
permutest(disp_all_stai) #yes, apparently they do have homogeneity of dispersion
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups      1 0.00027 0.0002712 0.0357    999  0.842
## Residuals 202 1.53481 0.0075981
disp_all_leq <- betadisper(M24_wunifrac_all, metadata_physeq_M24_all$postnatal_stress)
permutest(disp_all_leq) #yes, apparently they do have homogeneity of dispersion
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups      1 0.00743 0.0074292 0.9784    999  0.313
## Residuals 202 1.53378 0.0075930
#permanovas
#weighted unifrac
set.seed(main.seed)
wunifrac_all_covs <- adonis(M24_wunifrac_all~ stai_state_above_clin_cutoff_pw26 + postnatal_stress+maltreated_mod_M54+ mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all) 
wunifrac_all_covs #print output
## 
## Call:
## adonis(formula = M24_wunifrac_all ~ stai_state_above_clin_cutoff_pw26 +      postnatal_stress + maltreated_mod_M54 + mother_ethnicity +      deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch,      data = metadata_physeq_M24_all) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## stai_state_above_clin_cutoff_pw26   1    0.0296 0.029638 0.45265 0.00221  0.881
## postnatal_stress                    1    0.0521 0.052147 0.79642 0.00389  0.561
## maltreated_mod_M54                  1    0.0543 0.054297 0.82925 0.00405  0.577
## mother_ethnicity                    2    0.2222 0.111085 1.69655 0.01657  0.066
## deliv_mode                          1    0.0402 0.040162 0.61337 0.00300  0.721
## only_bf_months                      3    0.1802 0.060082 0.91760 0.01344  0.543
## antibio_M24                         1    0.1886 0.188649 2.88115 0.01407  0.010
## Sequencing_batch                    1    0.0677 0.067720 1.03426 0.00505  0.352
## Residuals                         192   12.5716 0.065477         0.93772       
## Total                             203   13.4066                  1.00000       
##                                     
## stai_state_above_clin_cutoff_pw26   
## postnatal_stress                    
## maltreated_mod_M54                  
## mother_ethnicity                  . 
## deliv_mode                          
## only_bf_months                      
## antibio_M24                       **
## Sequencing_batch                    
## Residuals                           
## Total                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(wunifrac_all_covs) #calculates omega squared for first predictor; change order of vars to get omega squared for other predictors
## [1] -0.002660215
#unweighted unifrac
set.seed(main.seed)
unifrac_all_covs <- adonis(M24_unifrac_all~ maltreated_mod_M54+ stai_state_above_clin_cutoff_pw26 + postnatal_stress + mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all) 
unifrac_all_covs #print output
## 
## Call:
## adonis(formula = M24_unifrac_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 +      postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months +      antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## maltreated_mod_M54                  1    0.1320 0.13201  0.9285 0.00453  0.508
## stai_state_above_clin_cutoff_pw26   1    0.1330 0.13302  0.9356 0.00456  0.482
## postnatal_stress                    1    0.1383 0.13830  0.9727 0.00474  0.444
## mother_ethnicity                    2    0.2942 0.14711  1.0347 0.01009  0.390
## deliv_mode                          1    0.2002 0.20023  1.4083 0.00687  0.115
## only_bf_months                      3    0.3860 0.12868  0.9051 0.01324  0.666
## antibio_M24                         1    0.1168 0.11684  0.8218 0.00401  0.690
## Sequencing_batch                    1    0.4529 0.45294  3.1857 0.01554  0.001
## Residuals                         192   27.2981 0.14218         0.93641       
## Total                             203   29.1517                 1.00000       
##                                      
## maltreated_mod_M54                   
## stai_state_above_clin_cutoff_pw26    
## postnatal_stress                     
## mother_ethnicity                     
## deliv_mode                           
## only_bf_months                       
## antibio_M24                          
## Sequencing_batch                  ***
## Residuals                            
## Total                                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(unifrac_all_covs)
## [1] -0.0003470041
#bray-curtis
bc_all_covs <- adonis(M24_bc_all~ maltreated_mod_M54+stai_state_above_clin_cutoff_pw26 + postnatal_stress+ mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all) 
bc_all_covs #print output
## 
## Call:
## adonis(formula = M24_bc_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 +      postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months +      antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## maltreated_mod_M54                  1     0.275 0.27522 1.08687 0.00527  0.326
## stai_state_above_clin_cutoff_pw26   1     0.171 0.17082 0.67458 0.00327  0.837
## postnatal_stress                    1     0.158 0.15813 0.62448 0.00303  0.869
## mother_ethnicity                    2     0.776 0.38776 1.53129 0.01484  0.036
## deliv_mode                          1     0.238 0.23752 0.93797 0.00454  0.513
## only_bf_months                      3     1.014 0.33816 1.33543 0.01941  0.084
## antibio_M24                         1     0.370 0.37025 1.46216 0.00708  0.122
## Sequencing_batch                    1     0.640 0.63989 2.52699 0.01224  0.006
## Residuals                         192    48.619 0.25322         0.93031       
## Total                             203    52.261                 1.00000       
##                                     
## maltreated_mod_M54                  
## stai_state_above_clin_cutoff_pw26   
## postnatal_stress                    
## mother_ethnicity                  * 
## deliv_mode                          
## only_bf_months                    . 
## antibio_M24                         
## Sequencing_batch                  **
## Residuals                           
## Total                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(bc_all_covs)
## [1] 0.0004188679
#jaccard
jacc_all_covs <- adonis(M24_jaccard_all~maltreated_mod_M54+stai_state_above_clin_cutoff_pw26 +postnatal_stress+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_all) 
jacc_all_covs #print output
## 
## Call:
## adonis(formula = M24_jaccard_all ~ maltreated_mod_M54 + stai_state_above_clin_cutoff_pw26 +      postnatal_stress + mother_ethnicity + deliv_mode + only_bf_months +      antibio_M24 + Sequencing_batch, data = metadata_physeq_M24_all) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## maltreated_mod_M54                  1     0.352 0.35163 1.04855 0.00511  0.375
## stai_state_above_clin_cutoff_pw26   1     0.265 0.26487 0.78983 0.00385  0.813
## postnatal_stress                    1     0.252 0.25197 0.75136 0.00366  0.880
## mother_ethnicity                    2     0.923 0.46140 1.37589 0.01341  0.039
## deliv_mode                          1     0.333 0.33295 0.99285 0.00484  0.452
## only_bf_months                      3     1.228 0.40933 1.22062 0.01785  0.059
## antibio_M24                         1     0.422 0.42177 1.25774 0.00613  0.135
## Sequencing_batch                    1     0.653 0.65268 1.94630 0.00948  0.002
## Residuals                         192    64.386 0.33534         0.93567       
## Total                             203    68.813                 1.00000       
##                                     
## maltreated_mod_M54                  
## stai_state_above_clin_cutoff_pw26   
## postnatal_stress                    
## mother_ethnicity                  * 
## deliv_mode                          
## only_bf_months                    . 
## antibio_M24                         
## Sequencing_batch                  **
## Residuals                           
## Total                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(jacc_all_covs)
## [1] 0.000235455

Not Including Covariates

#NOT INCLUDING COVARIATES
#create a dataset with complete cases on all adversities
physeq_M24_all_ncovs <- subset_samples(physeq_M24, !is.na(postnatal_stress) & !is.na(maltreated_mod_M54) & !is.na(stai_state_above_clin_cutoff_pw26)) 

#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "unifrac", type = "samples")
M24_bc_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "bray", type = "samples")
M24_jaccard_all_ncovs <- phyloseq::distance(physeq_M24_all_ncovs, method = "jaccard", type = "samples")

#get metadata
metadata_physeq_M24_all_ncovs <- data.frame(sample_data(physeq_M24_all_ncovs)) #get data frame of metadata

#permanovas
#weighted unifrac
set.seed(main.seed)
wunifrac_all_ncovs <- adonis(M24_wunifrac_all_ncovs~ postnatal_stress+stai_state_above_clin_cutoff_pw26+maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
wunifrac_all_ncovs #print output
## 
## Call:
## adonis(formula = M24_wunifrac_all_ncovs ~ postnatal_stress +      stai_state_above_clin_cutoff_pw26 + maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## postnatal_stress                    1    0.0448 0.044811 0.66542 0.00321  0.698
## stai_state_above_clin_cutoff_pw26   1    0.0262 0.026193 0.38895 0.00188  0.917
## maltreated_mod_M54                  1    0.0627 0.062721 0.93137 0.00450  0.451
## Residuals                         205   13.8053 0.067343         0.99041       
## Total                             208   13.9391                  1.00000
omega_sq_adonis(wunifrac_all_ncovs)
## [1] -0.001608689
#unweighted unifrac
set.seed(main.seed)
unifrac_all_ncovs <- adonis(M24_unifrac_all_ncovs~stai_state_above_clin_cutoff_pw26+postnatal_stress+maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
unifrac_all_ncovs
## 
## Call:
## adonis(formula = M24_unifrac_all_ncovs ~ stai_state_above_clin_cutoff_pw26 +      postnatal_stress + maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## stai_state_above_clin_cutoff_pw26   1    0.1347 0.13467 0.93757 0.00451  0.506
## postnatal_stress                    1    0.1285 0.12850 0.89463 0.00431  0.546
## maltreated_mod_M54                  1    0.1291 0.12908 0.89866 0.00433  0.558
## Residuals                         205   29.4454 0.14364         0.98685       
## Total                             208   29.8376                 1.00000
omega_sq_adonis(unifrac_all_ncovs)
## [1] -0.0002990976
#bray-curtis
bc_all_ncovs <- adonis(M24_bc_all_ncovs~postnatal_stress+stai_state_above_clin_cutoff_pw26 +maltreated_mod_M54, data=metadata_physeq_M24_all_ncovs)
bc_all_ncovs
## 
## Call:
## adonis(formula = M24_bc_all_ncovs ~ postnatal_stress + stai_state_above_clin_cutoff_pw26 +      maltreated_mod_M54, data = metadata_physeq_M24_all_ncovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## postnatal_stress                    1     0.131 0.13080 0.50103 0.00242  0.944
## stai_state_above_clin_cutoff_pw26   1     0.163 0.16327 0.62543 0.00302  0.876
## maltreated_mod_M54                  1     0.292 0.29249 1.12042 0.00541  0.329
## Residuals                         205    53.516 0.26105         0.98916       
## Total                             208    54.102                 1.00000
omega_sq_adonis(bc_all_ncovs)
## [1] -0.002396033
#jaccard
jacc_all_ncovs <- adonis(M24_jaccard_all_ncovs~ postnatal_stress+maltreated_mod_M54+stai_state_above_clin_cutoff_pw26, data=metadata_physeq_M24_all_ncovs)
jacc_all_ncovs
## 
## Call:
## adonis(formula = M24_jaccard_all_ncovs ~ postnatal_stress + maltreated_mod_M54 +      stai_state_above_clin_cutoff_pw26, data = metadata_physeq_M24_all_ncovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## postnatal_stress                    1     0.233 0.23282 0.68098 0.00328  0.957
## maltreated_mod_M54                  1     0.356 0.35637 1.04239 0.00502  0.343
## stai_state_above_clin_cutoff_pw26   1     0.266 0.26574 0.77729 0.00375  0.856
## Residuals                         205    70.086 0.34188         0.98795       
## Total                             208    70.941                 1.00000
omega_sq_adonis(jacc_all_ncovs)
## [1] -0.001530048

Cumulative Adversity

Including Covariates

#get dataset
physeq_M24_cumulative <- subset_samples(physeq_M24, !is.na(cumulative_adv) & !is.na(only_bf_months) & !is.na(deliv_mode) & !is.na(mother_ethnicity) & !is.na(antibio_M24) & !is.na(Sequencing_batch)) 

#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "unifrac", type = "samples")
M24_bc_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "bray", type = "samples")
M24_jaccard_cumulative <- phyloseq::distance(physeq_M24_cumulative, method = "jaccard", type = "samples")

#get metadata
metadata_physeq_M24_cumulative <- data.frame(sample_data(physeq_M24_cumulative)) #get data frame of metadata

#test assumption of betadispersion
disp_cumulative<- betadisper(M24_wunifrac_cumulative, metadata_physeq_M24_cumulative$cumulative_adv)
permutest(disp_cumulative) #yes, apparently they do have homogeneity of dispersion
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
## Groups      2 0.01007 0.0050361 0.6625    999  0.513
## Residuals 201 1.52784 0.0076012
plot(disp_cumulative, hull=FALSE, ellipse=TRUE)

#permanovas 
#weighted unifrac
set.seed(main.seed)
wunifrac_cumulative_covs <- adonis(M24_wunifrac_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative) 
wunifrac_cumulative_covs
## 
## Call:
## adonis(formula = M24_wunifrac_cumulative ~ cumulative_adv + mother_ethnicity +      deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch,      data = metadata_physeq_M24_cumulative) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)  
## cumulative_adv     2    0.0905 0.045267 0.69075 0.00675  0.747  
## mother_ethnicity   2    0.2061 0.103066 1.57273 0.01538  0.095 .
## deliv_mode         1    0.0398 0.039844 0.60800 0.00297  0.725  
## only_bf_months     3    0.1683 0.056086 0.85584 0.01255  0.634  
## antibio_M24        1    0.1865 0.186534 2.84639 0.01391  0.016 *
## Sequencing_batch   1    0.0673 0.067325 1.02733 0.00502  0.358  
## Residuals        193   12.6480 0.065533         0.94341         
## Total            203   13.4066                  1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(wunifrac_cumulative_covs)
## [1] -0.003008632
#unweighted unifrac
set.seed(main.seed)
unifrac_cumulative_covs <- adonis(M24_unifrac_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
unifrac_cumulative_covs
## 
## Call:
## adonis(formula = M24_unifrac_cumulative ~ cumulative_adv + mother_ethnicity +      deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch,      data = metadata_physeq_M24_cumulative) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## cumulative_adv     2    0.3462 0.17309  1.2210 0.01188  0.164    
## mother_ethnicity   2    0.2942 0.14708  1.0375 0.01009  0.386    
## deliv_mode         1    0.1977 0.19766  1.3943 0.00678  0.123    
## only_bf_months     3    0.3831 0.12770  0.9008 0.01314  0.676    
## antibio_M24        1    0.1158 0.11577  0.8167 0.00397  0.695    
## Sequencing_batch   1    0.4552 0.45522  3.2112 0.01562  0.001 ***
## Residuals        193   27.3596 0.14176         0.93852           
## Total            203   29.1517                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(unifrac_cumulative_covs)
## [1] 0.002139351
#bray-curtis
bc_cumulative_covs <- adonis(M24_bc_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
bc_cumulative_covs
## 
## Call:
## adonis(formula = M24_bc_cumulative ~ cumulative_adv + mother_ethnicity +      deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch,      data = metadata_physeq_M24_cumulative) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## cumulative_adv     2     0.321 0.16065 0.63235 0.00615  0.953   
## mother_ethnicity   2     0.732 0.36623 1.44158 0.01402  0.059 . 
## deliv_mode         1     0.237 0.23706 0.93314 0.00454  0.520   
## only_bf_months     3     0.951 0.31700 1.24780 0.01820  0.144   
## antibio_M24        1     0.347 0.34676 1.36495 0.00664  0.161   
## Sequencing_batch   1     0.642 0.64175 2.52613 0.01228  0.005 **
## Residuals        193    49.031 0.25404         0.93819          
## Total            203    52.261                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(bc_cumulative_covs)
## [1] -0.003557018
#jaccard
jacc_cumulative_covs <- adonis(M24_jaccard_cumulative~cumulative_adv+mother_ethnicity+deliv_mode+only_bf_months+antibio_M24+Sequencing_batch, data=metadata_physeq_M24_cumulative)
jacc_cumulative_covs
## 
## Call:
## adonis(formula = M24_jaccard_cumulative ~ cumulative_adv + mother_ethnicity +      deliv_mode + only_bf_months + antibio_M24 + Sequencing_batch,      data = metadata_physeq_M24_cumulative) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                   Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)   
## cumulative_adv     2     0.481 0.24034 0.71504 0.00699  0.976   
## mother_ethnicity   2     0.892 0.44578 1.32628 0.01296  0.055 . 
## deliv_mode         1     0.333 0.33265 0.98969 0.00483  0.457   
## only_bf_months     3     1.171 0.39038 1.16144 0.01702  0.114   
## antibio_M24        1     0.413 0.41306 1.22892 0.00600  0.155   
## Sequencing_batch   1     0.654 0.65387 1.94538 0.00950  0.002 **
## Residuals        193    64.870 0.33611         0.94270          
## Total            203    68.813                 1.00000          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
omega_sq_adonis(jacc_cumulative_covs)
## [1] -0.002770185

Not Including Covariates

physeq_M24_cumulative_nocovs <- subset_samples(physeq_M24, !is.na(cumulative_adv)) 

#calculate distance matrices
set.seed(main.seed)
M24_wunifrac_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "wunifrac", type = "samples")
set.seed(main.seed)
M24_unifrac_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "unifrac", type = "samples")
M24_bc_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "bray", type = "samples")
M24_jaccard_cumulative_ncovs <- phyloseq::distance(physeq_M24_cumulative_nocovs, method = "jaccard", type = "samples")

#get metadata
metadata_physeq_M24_cumulative_nocovs <- data.frame(sample_data(physeq_M24_cumulative_nocovs)) #get data frame of metadata

#PERMANOVAS
#weighted unifrac
set.seed(main.seed)
wunifrac_cumulative_ncovs <- adonis(M24_wunifrac_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
wunifrac_cumulative_ncovs
## 
## Call:
## adonis(formula = M24_wunifrac_cumulative_ncovs ~ cumulative_adv,      data = metadata_physeq_M24_cumulative_nocovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)
## cumulative_adv   2    0.0966 0.048298 0.71876 0.00693  0.747
## Residuals      206   13.8425 0.067196         0.99307       
## Total          208   13.9391                  1.00000
omega_sq_adonis(wunifrac_cumulative_ncovs)
## [1] -0.002698585
#unweighted unifrac
set.seed(main.seed)
unifrac_cumulative_ncovs <- adonis(M24_unifrac_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
unifrac_cumulative_ncovs
## 
## Call:
## adonis(formula = M24_unifrac_cumulative_ncovs ~ cumulative_adv,      data = metadata_physeq_M24_cumulative_nocovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## cumulative_adv   2     0.328 0.16401  1.1449 0.01099  0.229
## Residuals      206    29.510 0.14325         0.98901       
## Total          208    29.838                 1.00000
omega_sq_adonis(unifrac_cumulative_ncovs)
## [1] 0.001384799
#bray-curtis
bc_cumulative_ncovs <- adonis(M24_bc_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
bc_cumulative_ncovs
## 
## Call:
## adonis(formula = M24_bc_cumulative_ncovs ~ cumulative_adv, data = metadata_physeq_M24_cumulative_nocovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)
## cumulative_adv   2     0.335 0.16765 0.64232 0.0062  0.946
## Residuals      206    53.767 0.26100         0.9938       
## Total          208    54.102                 1.0000
omega_sq_adonis(bc_cumulative_ncovs)
## [1] -0.003434516
#jaccard
jacc_cumulative_ncovs <- adonis(M24_jaccard_cumulative_ncovs~cumulative_adv, data=metadata_physeq_M24_cumulative_nocovs)
jacc_cumulative_ncovs
## 
## Call:
## adonis(formula = M24_jaccard_cumulative_ncovs ~ cumulative_adv,      data = metadata_physeq_M24_cumulative_nocovs) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## cumulative_adv   2     0.494 0.24695 0.72214 0.00696  0.982
## Residuals      206    70.447 0.34197         0.99304       
## Total          208    70.941                 1.00000
omega_sq_adonis(jacc_cumulative_ncovs)
## [1] -0.002666081

Differential Abundance (DA)

Filtering out low prevalence taxa:
In the literature, different filtering thresholds are used based on data features and research questions (e.g., whether rare taxa are considered important) (Carlson et al. (2021) Nature Communications).
For these analyses we chose to report the results after filtering at a moderate threshold 0.5%, and included a more conservative (0.1%) and more liberal (1%) threshold in the supplemental material:

  • at least 1% of samples have non-zero abundance (keeps 650 out of 1230 taxa; reported in supplement)
  • at least 0.5% of samples have non-zero abundance (keeps 773 out of 1230 taxa; reported in main text)
  • at least 0.1%of samples have non-zero abundance (keeps 1055 out of 1230 taxa, only removing taxa that are zero for all samples; reported in supplement)

Format data for DA analyses with MaAsLin2

Maaslin2 requires as input:
1. A tab-delimited metadata file (features as columns, samples as rows)
2. A tab-delimited feature file (taxonomy as columns, samples as rows)

#transform OTU counts to relative abundances
physeq_M24_relabund = transform_sample_counts(physeq_M24, function(x) x / sum(x))

#convert phyloseq OTU table to dataframe
M24_otus = as(otu_table(physeq_M24_relabund), "matrix") #convert from part of phyloseq object to matrix
M24_otus_df = as.data.frame(M24_otus) #convert to df

#provide name for first column (for merging)
M24_ra_otus_df <-
  M24_otus_df %>% 
  rownames_to_column(var = "Feature.ID")

#join taxonomy file to OTU table to get taxa names for OTUs
taxonomy_txt_path <- str_c(general_mb_path, "input_files/M2448_taxonomy_edited.tsv") #taxonomy file path
M2448_tax <- read.table(taxonomy_txt_path, header=TRUE, sep = "\t") #read in taxa

M24_ra_tax_features <-
  M2448_tax %>% 
  full_join(M24_ra_otus_df, by = "Feature.ID")

M24_ra_tax_features <-
  M24_ra_tax_features %>% 
  filter('Feature.ID' != "OTU948") #filter out one OTU/taxa which has NA values for all samples (filtered out for alpha diversity calculations in qiime2 preprocessing)

#remove OTU names
M24_ra_tax_features <-
  M24_ra_tax_features %>% 
  dplyr::select(
    -'Feature.ID'
  )

#transpose this taxonomy feature file so that taxonomy are columns
M24_ra_tax_features_t <- t(M24_ra_tax_features)
M24_ra_tax_features_t = setNames(data.frame(t(M24_ra_tax_features[,-1])), M24_ra_tax_features[,1])

#name sample ID column for matching with metadata in maaslin2
M24_ra_tax_features_t <-
  M24_ra_tax_features_t %>% 
  rownames_to_column(var = '#SampleID') 

#write new file combining taxonomy and OTU counts as .tsv
tax_features_ra_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_tax_features_ra.tsv"
write_tsv(M24_ra_tax_features_t, tax_features_ra_fp)
#also write these data to form B data folder for submission with GUSTO form B application
write_csv(M24_ra_tax_features_t, "../../manuscripts/fran_adversity_microbiome/Form_B_submission/data_updated/microbiome_taxonomy_and_counts.csv")

#read in metadata as dataframe
M24_metadata <- read_tsv(str_c(general_mb_path, "input_files/M24_metadata_typeheader_new.tsv"))

#remove typeheader so that variable type can be read correctly by R
M24_metadata <- M24_metadata[-1, ]

#write metadata file with M24 samples as .tsv
M24_meta_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.tsv"
write_tsv(M24_metadata, M24_meta_fp)
#manually open the .tsv and save it as .csv
csv_file <- "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.csv"
#read .csv into R -- after removing the typeheader, this will automatically assign each variable to the correct type
test <- read_csv(csv_file)

#manually assign correct variable type to binary variables that were read as double (numeric with decimal place) so that maaslin2 will treat those variables as categorical
test <- test %>% 
    dplyr::mutate(across(.cols = c(maltreated_mod_M54, stai_state_above_clin_cutoff_pw26, antibio_M24, probiotics_M24, cumulative_adv), ~as.character(.x)))

#create new descriptive variables for easier visualization when graphing
test <- test %>% 
  mutate(sex_binary_ch = case_when( 
    sex_binary == 1 ~ "male",
    sex_binary== 0 ~ "female"
  ),
  postnatal_stress_s = case_when(
    postnatal_stress==1 ~ "yes",
    postnatal_stress==0 ~ "no"
  ))

#remove variables that won't be used in maaslin2 analyses
test <- test %>% 
  dplyr::select(
    -contains("mom"),
    -sum_advrsty,
    -deliv_mode
  )

#write new metadata file with correct dadta types back to original file path
write_tsv(test, "../../data/analysis_datasets/fran/maaslin2/inputs/M24_metadata.tsv")

#load final metadata file for analysis below
M24_metadata <- read_tsv(M24_meta_fp)

Covariate Selection

Covariates will be chosen based on overlap in significantly different bacteria with adversity maaslin results without covariates at the genus level.

Possible covariates include: sequencing batch, antibiotics, probiotics, sex, delivery mode, ethnicity, duration of exclusive breastfeeding, income, gestational age at birth, age at stool sample proxy.

Same Maaslin2 default parameters as used above and 0.5% filtering threshold.

Child Sex

#child sex
maaslin2_sex_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/sex/sex_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("sex_binary_ch"),
  standardize = TRUE 
)

Antibiotic Use

#antibiotic use
maaslin2_antibiotics_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/antibiotics/antibiotiics_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("antibio_M24"),
  standardize = TRUE 
)

Probiotic Use

#probiotic use
maaslin2_probiotics_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/probiotics/probiotics_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("probiotics_M24"),
  standardize = TRUE
)

Delivery Mode

#delivery mode
maaslin2_delivmode_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/delivery_mode/delivmode_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("deliv_mode_str"),
  standardize = TRUE 
)

Child Ethnicity

#child ethnicity
maaslin2_ethnicity_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/ethnicity/ethnicity_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)' 
)

Age at Stool Sample

#age at stool sample
maaslin2_age_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/age/age_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("c_age_days_stoolproxy_m24"),
  standardize = TRUE
)

Income

#monthly household income per member
maaslin2_income_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/income/income_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25,
  analysis_method = "lm", 
  fixed_effects = c("monthly_income_per_member"),
  standardize = TRUE
)

Gestational Age at Birth

#gestational age at birth
maaslin2_ga_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/gestational_age/ga_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("GA"),
  standardize = TRUE
)

Duration of Exclusive Breastfeeding

#duration of exclusive breastfeeding
maaslin2_exclusivebf_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/duration_of_exclusive_bf/exclusivebf_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("only_bf_months"),
  standardize = TRUE,
  reference = 'only_bf_months, c(0, 1, 2, 4)' #provide reference level for variables with more than 2 categories
)

Duration of Any Breastfeeding

#duration of any breastfeeding
maaslin2_anybf_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/duration_of_any_bf/anybf_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("any_bf_months"),
  standardize = TRUE, 
  reference = 'any_bf_months, c(0, 1, 2, 3)'
)

Sequencing Batch

#sequencing batch
maaslin2_seqbatch_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/only_covs/sequencing_batch/seqbatch_0.5p_results",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("Sequencing_batch"),
  standardize = TRUE 
)

Significant covariates for maaslin2 adversity analyses:

  • Preconception adversity: Duration of exclusive breastfeeding, child ethnicity, and sequencing batch
  • Prenatal adversity: Duration of exclusive breastfeeding, probiotic use, child ethnicity, and sequencing batch
  • Postnatal adversity: Child ethnicity
  • All adversities together: Duration of exclusive breastfeeding, child ethnicity, probiotic use, sequencing batch, and child age at stool sample
  • Cumulative adversity: none

Without Covariates

All Adversity Exposures

Note: to perform the MaAsLin2 analyses, both an OTU file with all of the microbiome data, and a metadata file with all of the variables that will be associated with the microbiome data are needed. At the time of running these analyses, MaAsLin2 calculated the sample size, and the sample size with non-zero taxa abundance based on the OTU/taxonomy file. However, when a sample was missing some metadata, that sample would not be included in the analysis with the metadata variable (due to listwise deletion) and thus, the sample sizes would differ from that of the OTU file. This issue was brought to the attention of the developers. A simple fix to this problem would be to run MaAsLin2 on datasets with complete cases, to obtain the correct sample size. However, as we wanted to filter the data for non-zero abundance based on all samples with usable gut microbiome data, instead of subsets of the dataset based on metadata availability, we took the approach of running MaAsLin2 with all the cases included (in which instance, the sample sizes reported are incorrect) and again with complete cases for each metadata variable (in which instance the filtering for non zero abundance and thus, the results are incorrect, but the sample sizes are correct)

To reduce space, instead of writing out each version separately below, we ran each code chunk twice, swapping out the metadata file for all vs. complete cases and changing the folder name for the results (adding “_allcases” and “_completecases” for 1 and 2 respectively). Analysis objects below are saved using the all cases version for use in visualizations later, as that is the version with the coefficient estimates that we used.

#ALL ADVERSITIES TOGETHER
#check N for all adversities analyses
alpha_diversity_stress_usable %>% dplyr::count(
  !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(postnatal_stress) #N is 205
)

#create complete cases dataset to determine N and N.not.0
alladv_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(postnatal_stress)) 
write_tsv(alladv_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv_nocovs.tsv")
alladv_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv_nocovs.tsv"

#All adversities -- 0.5% filtering
maaslin2_alladv_nocov_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/all_adversities/alladv_nocovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("postnatal_stress", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
  standardize = TRUE 
)

Cumulative Adversity

#CUMULATIVE ADVERSITY
#create complete cases dataset to determine N and N.not.0
cumulative_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cumulative_adv_s)) 
write_tsv(cumulative_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_cumulative_nocovs.tsv")
cumulative_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_cumulative_nocovs.tsv"

#Cumulative adversity -- 1% filtering
maaslin2_cumulative_nocov_1 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/adversity_cumulativeicity/cumulative_nocovs_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("cumulative_adv_s"),
  standardize = TRUE, 
  reference = c("cumulative_adv_s,0 tp") #provide reference level for variables with more than 2 categories
)

#Cumulative adversity -- 0.5% filtering
maaslin2_cumulative_nocov_0.5 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/cumulative_adversity/cumulative_nocovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm",
  fixed_effects = c("cumulative_adv_s"),
  standardize = TRUE, 
  reference = c("cumulative_adv_s,0 tp") 
)

#Cumulative adversity -- 0.1% filtering
maaslin2_cumulative_nocov_0.1 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/adversity_cumulativeicity/cumulative_nocovs_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cumulative_adv_s"),
  standardize = TRUE, 
  reference = c("cumulative_adv_s,0 tp")
)

Each Adversity Exposure

Preconception Adversity

#PRECONCEPTION ADVERSITY
#create complete cases dataset to determine correct N and N.not.0
precon_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(ctq_total_score_M54)) #N=279
write_tsv(precon_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon_nocovs.tsv")
precon_meta_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon_nocovs.tsv"
  
#preconception adversity -- 0.5% filtering
maaslin_mothermalt_nocov_05 <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/mother_maltreatment/maltreated_nocovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005, #0.5% filtering threshold
  max_significance = 0.25, #q-value threshold recommended for biomarker discovery
  #defaults: normalization = TSS and transform = log
  analysis_method = "lm", #default
  fixed_effects = c("ctq_total_score_M54"),
  
  standardize = TRUE #this option only affects the analysis when there is greater than one predictor
)

Prenatal Adversity

#PRENATAL ADVERSITY
#create complete cases dataset to determine N and N.not.0
prenat_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(STAI_prorated_state_pw26)) 
write_tsv(prenat_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat_nocovs.tsv")
prenat_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat_nocovs.tsv"

#Prenatal adversity -- 0.5% filtering
maaslin_prenatalanx_nocov_05 <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/no_covs/prenatal_anxiety/prenatalanx_nocovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005, 
  max_significance = 0.25,
  analysis_method = "lm",
  fixed_effects = c("STAI_prorated_state_pw26"),
  standardize = TRUE 
)

Postnatal Adversity

#POSTNATAL ADVERSITY
postnat_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(STAI_prorated_state_pw26)) 
write_tsv(postnat_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat_nocovs.tsv")
postnat_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat_nocovs.tsv"

#postnatal adversity - 0.5% filtering
maaslin2_postnatalstress_nocov_05 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/no_covs/postnatal_stress/poststress_nocovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("postnatal_stress"),
  standardize = TRUE 
)

Including Covariates

All Adversity Exposures

The same process as described in the “without covariates” section is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters as used above, and 0.1%, 0.5%, and 1% filtering thresholds are used.

#ALL ADVERSITIES TOGETHER
#create all adversity metadata
alladv_metadata <- M24_metadata %>% 
  filter(!is.na(postnatal_stress) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(probiotics_M24) & !is.na(Sequencing_batch) & !is.na(c_age_years_stoolproxy_m24)) #N=196
write_tsv(alladv_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv.tsv")
alladv_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_alladv.tsv"

glimpse(alladv_metadata)

#All adversities -- 1% filtering threshold
maaslin2_alladv_withcov_1 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#All adversities -- 0.5% filtering threshold
maaslin2_alladv_withcov_05 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_0.5p_results_allcases_updated",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25,
  analysis_method = "lm", 
  fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#All adversities -- 0.1% filtering threshold
maaslin2_alladv_withcov_01 = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = alladv_metadata_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/all_adversities/alladv_wcovs_0.1p_results_completecases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm",
  fixed_effects = c("postnatal_stress_s", "STAI_prorated_state_pw26", "ctq_total_score_M54", "mother_ethnicity", "only_bf_months", "c_age_years_stoolproxy_m24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

Cumulative Adversity

There were no selected covariates for cumulative adversity, so we only ran analyses for cumulative adversity not including covariates.

Each Adversity Exposure

Preconception Adversity

#PRECONCEPTION ADVERSITY
#create complete metadata files to be used for these
precon_metadata <- M24_metadata %>% 
  filter(!is.na(ctq_total_score_M54) & !is.na(Sequencing_batch) & !is.na(only_bf_months) & !is.na(mother_ethnicity)) 
write_tsv(precon_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon.tsv")
precon_meta_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_precon.tsv"

#preconception adversity - 1% filtering threshold
maaslin_maltreatment_covs_1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/with_covs/mother_maltreatment/mothermalt_wcovs_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
  standardize = TRUE, #because we have multiple continuous metadata variables
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#preconception adversity -- 0.5% filtering threshold
maaslin_maltreatment_covs_0.5p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/mother_maltreatment/mothermalt_wcovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#preconception adversity -- 0.1% filtering threshold
maaslin_maltreatment_covs_0.1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = precon_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/mother_maltreatment/mothermalt_wcovs_0.1p_results_completecases",
  min_abundance = 0,
  min_prevalence = 0.001, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "only_bf_months", "ctq_total_score_M54", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

Prenatal Adversity

#PRENATAL ADVERSITY
#create prenatal adversity metadata
prenat_metadata <- M24_metadata %>% 
  filter(!is.na(STAI_prorated_state_pw26) & !is.na(Sequencing_batch) & !is.na(only_bf_months) & !is.na(probiotics_M24) & !is.na(mother_ethnicity)) #N=420
write_tsv(prenat_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat.tsv")
prenat_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_prenat.tsv"


#prenatal adversity -- 1% filtering threshold 
maaslin_prenatanx_covs_1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#prenatal adversity -- 0.5% filtering threshold 
maaslin_prenatanx_covs_0.5p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005, 
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

#prenatal adversity -- 0.1% filtering threshold 
maaslin_prenatanx_covs_0.1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/prenatal_anxiety/prenatanx_wcovs_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "only_bf_months", "probiotics_M24", "STAI_prorated_state_pw26", "Sequencing_batch"),
  standardize = TRUE, 
  reference = c('mother_ethnicity, c(0, 1, 2)', 'only_bf_months, c(3, 4, 5)') 
)

Postnatal Adversity

#POSTNATAL ADVERSITY
#create postnatal adversity metadata
postnat_metadata <- M24_metadata %>% 
  filter(!is.na(postnatal_stress) & !is.na(mother_ethnicity)) #N=310
write_tsv(postnat_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat.tsv")
postnat_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_postnat.tsv"

#postnatal adversity -- 1% filtering threshold
maaslin_poststress_covs_1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm",
  fixed_effects = c("mother_ethnicity", "postnatal_stress_s"),
  standardize = TRUE,
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

#Postnatal adversity -- 0.05% filtering threshold
maaslin_poststress_covs_0.5p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "postnatal_stress_s"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)' 
)

#Postnatal adversity -- 0.01% filtering threshold
maaslin_poststress_covs_0.1p <- Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/adversity/with_covs/postnatal_stress/poststress_wcovs_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001, 
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("mother_ethnicity", "postnatal_stress"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)' 
)

Microbiome associations with child socioemotional functioning (SEF)

Determine which SEF domains are correlated with adversities

Note: within subsample of participants that have microbiome data

#select adversity variables from usable dataset
cbcl_adversity <- alpha_diversity_stress_usable %>% 
  dplyr::select(
    subjid,
    ctq_total_score_M54,
    STAI_prorated_state_pw26,
    postnatal_stress,
    cumulative_adv
  )

#select all CBCL variables of interest from Y2 and Y4 score datasets
cbcl_Y4 <- read_csv(beh4_file) %>% 
  dplyr::select(
    subjid,
    contains("std")
  )

cbcl_Y2 <- read_csv(beh2_file) %>% 
  dplyr::select(
    subjid,
    contains("std")
  )

#look at correlations between cbcl subscales
cbcl_nosub <-
  child_beh %>%
  dplyr::select(-subjid)

corPlot(cbcl_nosub,
        numbers = TRUE,
        stars = TRUE,
        show.legend = TRUE,
        cex.axis = 0.8,
        cex = 0.5) 

#join cbcl into adversity variables - separately for each timepoint
cbcl2_adversity <- cbcl_adversity %>% 
  left_join( #note: doing left join bc we only care about cbcl within mb subsample
    cbcl_Y2, by = "subjid"
  ) 

cbcl4_adversity <- cbcl_adversity %>% 
  left_join(
    cbcl_Y4, by = "subjid"
  )

#create datasets with just continuous vars
cbcl2_adversity_cor <- cbcl2_adversity %>% 
  dplyr::select(
    -subjid,
    -postnatal_stress,
    -cumulative_adv
  )

cbcl4_adversity_cor <- cbcl4_adversity %>% 
  dplyr::select(
    -subjid,
    -postnatal_stress,
    -cumulative_adv
  )

#visualize cbcl year 2 correlations
corPlot(cbcl2_adversity_cor,
        numbers=TRUE,
        main = "",
        stars=TRUE,
        show.legend=TRUE,
        diag=FALSE,
        upper=FALSE,
        scale = TRUE,
        xlas=2,
        ylas=2,
#        gr=gr,
        cex.axis = 0.8,
        cex = 0.5)

#determine correlation values and which are significant after multiple comparison correction
cbcl2_cors <- corr.test(cbcl2_adversity_cor, method="pearson", use = "pairwise", adjust="BH")
View(cbcl2_cors$ci2) #view adjusted p-values. Note that these don't show the labels for which correlation it is. So determine the 'adjusted' p-value cutoff w/respect to the unadjusted p-values, and then go off of that on the df$ci dataframe

cbcl4_cors <- corr.test(cbcl4_adversity_cor, method="pearson", use = "pairwise", adjust="BH")
View(cbcl4_cors$ci2) #view adjusted pvalues

#visualize cbcl year 4 correlations
corPlot(cbcl4_adversity_cor,
        numbers=TRUE,
        main = "",
        stars=TRUE,
        show.legend=TRUE,
        diag=FALSE,
        upper=FALSE,
        scale = TRUE,
        xlas=2,
        ylas=2,
#        gr=gr,
        cex.axis = 0.8,
        cex = 0.5)

#t-tests for postnatal adversity and cbcl at age 2 years
#grab only needed variables
cbcl2_postnatal <- cbcl2_adversity %>% 
  dplyr::select(
    postnatal_stress,
    STAI_prorated_state_pw26,
    ctq_total_score_M54,
    contains("cbcl")
  )
#put data in long format to prep
cbcl2_postnatal.longer <- cbcl2_postnatal %>%
  pivot_longer(-postnatal_stress, names_to = "variables", values_to = "value")
cbcl2_postnatal.longer %>% sample_n(6)

#run t-tests
stat.test <- cbcl2_postnatal.longer %>%
  group_by(variables) %>%
  t_test(value ~ postnatal_stress) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
stat.test

#t-tests for postnatal adversity and cbcl outcomes year 4
#grab only needed variables
cbcl4_postnatal <- cbcl4_adversity %>% 
  dplyr::select(
    postnatal_stress,
    contains("cbcl")
  )
#put data in long format to prep
cbcl4_postnatal.longer <- cbcl4_postnatal %>%
  pivot_longer(-postnatal_stress, names_to = "variables", values_to = "value")
cbcl4_postnatal.longer %>% sample_n(6)

#run t-tests
stat.test <- cbcl4_postnatal.longer %>%
  group_by(variables) %>%
  t_test(value ~ postnatal_stress) %>%
  adjust_pvalue(method = "BH") %>%
  add_significance()
stat.test


#anovas for cumulative adversity and cbcl outcomes at year 2 and  year 4
cbcl2_cumulative <- cbcl2_adversity %>% #grab only needed variables
  dplyr::select(
    cumulative_adv,
    contains("cbcl")
  )

cbcl4_cumulative <- cbcl4_adversity %>% #grab only needed variables
  dplyr::select(
    cumulative_adv,
    contains("cbcl")
  )

#put data in long format to prep
cbcl2_cumulative.longer <- cbcl2_cumulative %>%
  pivot_longer(-cumulative_adv, names_to = "variables", values_to = "value")

cbcl4_cumulative.longer <- cbcl4_cumulative %>%
  pivot_longer(-cumulative_adv, names_to = "variables", values_to = "value")

#run anovas for 2 year cbcl
anovas_cbcl_2 <- cbcl2_cumulative.longer %>% 
  nest(data=(c(cumulative_adv, value))) %>% 
  mutate(model = map(data, ~anova(lm(value ~ cumulative_adv, .))), 
         tidy = map(model, broom::tidy)) %>% 
dplyr::select(variables, tidy) %>% 
unnest(cols=c(tidy)) 

anovas_cbcl_4 <- cbcl4_cumulative.longer %>% 
  nest(data=(c(cumulative_adv, value))) %>% 
  mutate(model = map(data, ~anova(lm(value ~ cumulative_adv, .))), 
         tidy = map(model, broom::tidy)) %>% 
dplyr::select(variables, tidy) %>% 
unnest(cols=c(tidy)) 

#adjust p-values
anovas_cbcl_2$BH <- p.adjust(test$p.value, method="BH")
anovas_cbcl_4$BH <- p.adjust(test$p.value, method="BH")

Adversities Associated with SEF Domains Age 2 Years:

  • Internalizing Problems: preconception adversity, prenatal adversity
  • Total Problems: preconception adversity, prenatal adversity
  • Developmental Problems: prenatal adversity

Other domains were also associated with adversities (e.g., anxious-depressed problems), but since these could be subsumed within internalizing symptoms, we chose to examine internalizing problems only.

Age 4 Years:

  • ADHD Problems: preconception adversity, prenatal adversity
  • Attention Problems: preconception adversity, prenatal adversity
  • Developmental Problems: prenatal adversity
  • Externalizing Problems: preconception adversity, prenatal adversity
  • Internalizing Problems: prenatal adversity
  • Sleep Problems: prenatal adversity
  • Total Problems: preconception adversity, prenatal adversity

In the same manner as for Age 2 Years variables, we selected the broadest child SEF domains from significant results to test for associations with gut microbiome composition.

Differential Abundance

SEF at Age 2 years – Without Covariates

Note: Covariates will be chosen based on overlap in these results (at the genus level) with the DA analyses for possible covariates that were run in the “Covariate Selection” chunks above.

The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well.

Internalizing Problems

#INTERNALIZING PROBLEMS
#create complete cases dataset to determine N and N.not.0
int2_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbclintprobtot_m24_std) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=151
write_tsv(int2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int2_nocovs.tsv")
int2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int2_nocovs.tsv"

# Internalizing problems - 0.5% filtering threshold
maaslin_int_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/int_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("cbclintprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26"), #correlated adversity variables are included
  standardize = TRUE
)

Developmental Problems

#DEVELOPMENTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
dev2_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbcldevprobtot_m24_std) &!is.na(STAI_prorated_state_pw26))
write_tsv(dev2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev2_nocovs.tsv")
dev2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev2_nocovs.tsv"

#developmental problems - 0.5% filtering threshold
maaslin_dev2_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/dev_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm",
  fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26"),
  standardize = TRUE
)

Total Problems

#TOTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
tot2_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbcltotprobtot_m24_std) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26)) #N=151
write_tsv(tot2_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot2_nocovs.tsv")
tot2_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot2_nocovs.tsv"

#Total problems - 0.5% filtering threshold
maaslin_tot_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/no_covs/tot_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26"),
  standardize = TRUE 
)

Selected covariates:

  • Internalizing problems at age 2 years: none
  • Total problems at age 2 years: sequencing batch,
  • Developmental at age 2 years: delivery mode, duration of exclusive breastfeeding, child ethnicitiy
    A priori covariates: Child sex, any associated adversities

SEF at Age 4 years – Without Covariates

ADHD Problems

#ADHD PROBLEMS
adhd4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbcladhdprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(adhd4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhd4_nocovs.tsv")
adhd4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhd4_nocovs.tsv"

#ADHD problems - 0.5% filtering threshold
maaslin_adhd4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/adhd_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25,
  analysis_method = "lm", 
  fixed_effects = c("cbcladhdprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
  standardize = TRUE 
)

Attention Problems

#ATTENTION PROBLEMS
attn4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(attn4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attn4_nocovs.tsv")
attn4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attn4_nocovs.tsv"

#Attn problems - 0.5% filtering threshold
maaslin_att4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/attn_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbclattprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
  standardize = TRUE 
)

Developmental Problems

#DEVELOPMENTAL PROBLEMS
#create complete cases dataset to determine N and N.not.0
dev4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbcldevprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(dev4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev4_nocovs.tsv")
dev4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_dev4_nocovs.tsv"

#Dev problems - 0.5% filtering threshold
maaslin_dev4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = dev4_metadata_nocovs_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/dev_raw_0.5p_results_nocovs_adversities_completecases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcldevprobtot_y4_std", "STAI_prorated_state_pw26"),
  standardize = TRUE 
)

Externalizing Problems

#EXTERNALIZING PROBLEMS
ext4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbclextprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(ext4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_ext4_nocovs.tsv")
ext4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_ext4_nocovs.tsv"

#Externalizing - 0.5% filtering threshold
maaslin_ext4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/ext_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25,
  analysis_method = "lm", #default
  fixed_effects = c("cbclextprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
  standardize = TRUE 
  
)

Internalizing Problems

#INTERNALIZING PROBLEMS
#create complete cases dataset to determine N and N.not.0
int4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbclintprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(int4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int4_nocovs.tsv")
int4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_int4_nocovs.tsv"

#Internalizing problems - 0.5% filtering threshold
maaslin_int4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/int_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("cbclintprobtot_y4_std", "STAI_prorated_state_pw26"),
  standardize = TRUE
)

Sleep Problems

#SLEEP PROBLEMS
#create complete cases dataset to determine N and N.not.0
sleep4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbclsleepprobtot_y4_std) & !is.na(STAI_prorated_state_pw26)) #N=289
write_tsv(sleep4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleep4_nocovs.tsv")
sleep4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleep4_nocovs.tsv"

#Sleep problems - 0.5% filtering threshold
maaslin_sleep4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = sleep4_metadata_nocovs_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/sleep_raw_0.5p_results_nocovs_adversities_completecases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbclsleepprobtot_y4_std", "STAI_prorated_state_pw26"),
  standardize = TRUE 
)

Total Problems

#TOTAL PROBLEMS
tot4_metadata_nocovs <- M24_metadata %>% 
  filter(!is.na(cbcltotprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(ctq_total_score_M54))
write_tsv(tot4_metadata_nocovs, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot4_nocovs.tsv")
tot4_metadata_nocovs_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_tot4_nocovs.tsv"

#Total problems - 0.5% filtering threshold
maaslin_tot4_0.5p_nocovs = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/no_covs/tot_raw_0.5p_results_nocovs_adversities_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("cbcltotprobtot_y4_std", "STAI_prorated_state_pw26", "ctq_total_score_M54"),
  standardize = TRUE 
)

Selected covariates:

  • adhd problems y4: none
  • attention probelms y4: none
  • developmental problems y4: none
  • externalizing problems y4: none
  • internalizing problems y4: duration of exclusive breastfeeding, child ethnicity, sequencing batch
  • sleep problems y4: duration of exclusive breastfeeding, child ethnicity, probiotic use, sequencing batch
  • total problems y4: none
    A prior covariates: child sex, any associated adversities

SEF at Age 2 years – Including Covariates

The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well. #### Developmental Problems

#DEVELOPMENTAL PROBLEMS
dev2_metadata <- M24_metadata %>% 
  filter(!is.na(cbcldevprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(deliv_mode_str) & !is.na(only_bf_months) & !is.na(Sequencing_batch)) #N=166
write_tsv(dev2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devM24.tsv")
dev2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devM24.tsv" 

#developmental problems - 1% filtering threshold
maaslin_dev2_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
  standardize = TRUE
) 

#developmental problems - 0.5% filtering threshold
maaslin_dev2_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
  standardize = TRUE
) 
  
#developmental problems - 0.1% filtering threshold
maaslin_dev2_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/dev_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcldevprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch", "deliv_mode_str", "only_bf_months", "Sequencing_batch"),
  standardize = TRUE 
)

Internalizing Problems

#INTERNALIZING PROBLESM
#create metadata file with complete cases for internalizing
int2_metadata <- M24_metadata %>% 
  filter(!is.na(cbclintprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary)) #N=171
write_tsv(int2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intM24.tsv")
int2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intM24.tsv"

# Internalizing problems - 0.5% filtering threshold
maaslin_int_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
  standardize = TRUE 
)

# Internalizing problems - 1% filtering threshold
maaslin_int_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25,
  analysis_method = "lm", #default
  fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
  standardize = TRUE
)

# Internalizing problems - 0.1% filtering threshold
maaslin_int_0.1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/int_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbclintprobtot_m24_std", "STAI_prorated_state_pw26", "sex_binary_ch"),
  standardize = TRUE
)

Total Problems

#TOTAL PROBLEMS
#create metadata file with complete cases for total
tot2_metadata <- M24_metadata %>% 
  filter(!is.na(cbcltotprobtot_m24_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(Sequencing_batch) & !is.na(ctq_total_score_M54)) #N=150
write_tsv(tot2_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totM24.tsv")
tot2_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totM24.tsv" 

#Total problems - 0.5% filtering threshold
maaslin_tot_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
  standardize = TRUE
)

#Total problems - 1% filtering threshold
maaslin_tot_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
  standardize = TRUE 
)

#Total problems - 0.1% filtering threshold
maaslin_tot_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/m24/with_covs/tot_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("cbcltotprobtot_m24_std", "ctq_total_score_M54", "STAI_prorated_state_pw26", "sex_binary_ch", "Sequencing_batch"),
  standardize = TRUE 
)

SEF at Age 4 years – Including Covariates

The same process as described in the adversity differential abundance sections is followed, in which analyses are run once with all cases (to get coefficient estimates) and once with complete cases (to get correct N and N.not.0). The same Maaslin2 default parameters are used as well. #### ADHD Problems

#ADHD PROBLEMS
adhd4_metadata <- M24_metadata %>% 
  filter(!is.na(cbcladhdprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=201
write_tsv(adhd4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhdY4.tsv")
adhd4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_adhdY4.tsv" 

#ADHD problems - 0.5% filtering threshold
maaslin_adhd4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#ADHD problems - 1% filtering threshold
maaslin_adhd4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm",
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#ADHD problems - 0.1% filtering
maaslin_adhd4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/adhd_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcladhdprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

Attention Problems

#ATTENTION PROBLEMS
attn4_metadata <- M24_metadata %>% 
  filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=202
write_tsv(attn4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attnY4.tsv")
attn4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_attnY4.tsv" 

#Attention problems - 0.5% filtering threshold
maaslin_att4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Attention problems - 1% filtering threshold
maaslin_att4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Attention problems - 0.1% filtering threshold
maaslin_att4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/attn_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclattprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

Developmental Problems

#DEVELOPMENTAL PROBLEMS
dev4_metadata <- M24_metadata %>% 
  filter(!is.na(cbcldevprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary)) #N=289
write_tsv(dev4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devY4.tsv")
dev4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_devY4.tsv" 

#Developmental problems - 0.5% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Developmental problems - 0.1% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = dev4_metadata_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.1p_results_completecases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Developmental problems - 1% filtering threshold
maaslin_dev4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = dev4_metadata_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/dev_raw_0.1p_results_completecases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbcldevprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

Externalizing Problems

#EXTERNALIZING PROBLEMS
ext4_metadata <- M24_metadata %>% 
  filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(ctq_total_score_M54)) #N=202
write_tsv(ext4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_extY4.tsv")
ext4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_extY4.tsv" 

#Externalizing - 0.5% filtering threshold
maaslin_ext4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Externalizing - 1% filtering threshold
maaslin_ext4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Externalizing - 0.1% filtering threshold
maaslin_ext4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/ext_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbclextprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

Internalizing Problems

#INTERNALIZING PROBLEMS
#get complete cases metadata file
int4_metadata <- M24_metadata %>% 
  filter(!is.na(cbclintprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(only_bf_months) & !is.na(Sequencing_batch) & !is.na(mother_ethnicity)) #N=282
write_tsv(int4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intY4.tsv")
int4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_intY4.tsv" 

#Internalzing problems - 0.5% filtering threshold
maaslin_int4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

#Internalzing problems - 1% filtering threshold
maaslin_int4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

#Internalzing problems - 0.1% filtering threshold
maaslin_int4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/int_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbclintprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

Sleep Problems

#SLEEP PROBLEMS
sleep4_metadata <- M24_metadata %>% 
  filter(!is.na(cbclattprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) & !is.na(only_bf_months) & !is.na(mother_ethnicity) & !is.na(probiotics_M24) & !is.na(Sequencing_batch)) #N=276
write_tsv(sleep4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleepY4.tsv")
sleep4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_sleepY4.tsv" 

#Sleep problems - 0.5% filtering threshold
maaslin_sleep4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

#Sleep problems - 1% filtering threshold
maaslin_sleep4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

#Sleep problems - 0.1% filtering threshold
maaslin_sleep4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/sleep_raw_0.1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", #default
  fixed_effects = c("STAI_prorated_state_pw26", "cbclsleepprobtot_y4_std", "sex_binary_ch", "only_bf_months", "mother_ethnicity", "probiotics_M24", "Sequencing_batch"),
  standardize = TRUE, 
  reference = 'mother_ethnicity, c(0, 1, 2)'
)

Total Problems

#TOTAL PROBLEMS
tot4_metadata <- M24_metadata %>% 
  filter(!is.na(cbcltotprobtot_y4_std) & !is.na(STAI_prorated_state_pw26) & !is.na(sex_binary) ) #N=288
write_tsv(tot4_metadata, "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totY4.tsv")
tot4_metadata_fp <- "../../data/analysis_datasets/fran/maaslin2/inputs/metadata/metadata_totY4.tsv" 


#Total problems - 0.5% filtering threshold
maaslin_tot4_0.5p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_0.5p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.005,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Total problems - 1% filtering threshold
maaslin_tot4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = M24_meta_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_1p_results_allcases",
  min_abundance = 0,
  min_prevalence = 0.01,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

#Total problems - 0.1% filtering threshold
maaslin_tot4_1p = Maaslin2(
  input_data = tax_features_ra_fp,
  input_metadata = tot4_metadata_fp,
  output = "../../data/analysis_datasets/fran/maaslin2/final_results/cbcl/y4/with_covs/tot_raw_0.1p_results_completecases",
  min_abundance = 0,
  min_prevalence = 0.001,
  max_significance = 0.25, 
  analysis_method = "lm", 
  fixed_effects = c("STAI_prorated_state_pw26", "ctq_total_score_M54", "cbcltotprobtot_y4_std", "sex_binary_ch"),
  standardize = TRUE 
)

Consolidate main differential abundance results into one dataset

For graphing below
Only including taxa with N.not.0 >= 15% of all samples in the analysis, and datasets at 0.5% filtering threshold

#Combine Adversity Results
#find cutoff Ns for each analysis
ctq_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(ctq_total_score_M54) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(Sequencing_batch)) 
stai_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(STAI_prorated_state_pw26) & !is.na(mother_ethnicity) & !is.na(only_bf_months) & !is.na(Sequencing_batch) & !is.na(probiotics_M24)) 
leq_n <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(mother_ethnicity)) 
all_adv <- alpha_diversity_stress_usable %>% dplyr::count(!is.na(postnatal_stress) & !is.na(ctq_total_score_M54) & !is.na(STAI_prorated_state_pw26) & !is.na(mother_ethnicity) &!is.na(only_bf_months)&!is.na(Sequencing_batch) &!is.na(c_age_years_stoolproxy_m24))
cumu_n <-  alpha_diversity_stress_usable %>% dplyr::count(!is.na(cumulative_adv))

#grab results of interest from all cases run (so N and N.not.0 are incorrect, but coefficient estimates are correct)
ctq_results <- maaslin_maltreatment_covs_0.5p$results %>% 
  dplyr::filter(metadata=="ctq_total_score_M54" & qval < .25 & N.not.zero >= .15*ctq_n$n[2]) %>% 
  mutate(
    analysis = "preconception"
  )
stai_results <- maaslin_prenatanx_covs_0.5p$results %>% 
  dplyr::filter(metadata=="STAI_prorated_state_pw26" & qval < .25 & N.not.zero >= .15*stai_n$n[2]) %>% 
  mutate(
    analysis = "prenatal"
  )
leq_results <- maaslin_poststress_covs_0.5p$results %>% 
  dplyr::filter(metadata=="postnatal_stress_s" & qval < .25 & N.not.zero >= .15*leq_n$n[2]) %>% 
  mutate(
    analysis = "postnatal"
  )
alladv_results <- maaslin2_alladv_withcov_05$results %>% 
  dplyr::filter((metadata=="ctq_total_score_M54" | metadata=="STAI_prorated_state_pw26" | metadata=="postnatal_stress_s") & qval < .25 & N.not.zero >=.15*all_adv$n[2]) %>% 
  mutate(
    analysis = "all_adv"
  )
#no significant results for cumulative, so do not include here

adversity_results <- ctq_results %>% 
  bind_rows(stai_results) %>% 
  bind_rows(leq_results) %>% 
  bind_rows(alladv_results)

#Combine Socioemotional Functioning Results
#Ns hardcoded from complete cases analyses to save space

#grab results of interest from all cases run (so N and N.not.0 are incorrect, but coefficient estimates are correct)
maaslin_int2_results <- maaslin_int_0.5p$results %>%
  dplyr::filter(metadata=="cbclintprobtot_m24_std" & qval < .25 & N.not.zero >= .15*140)  %>%
  dplyr::mutate(analysis = "internalizing symptoms at age 2 years")

maaslin_dev2_results <- maaslin_dev2_0.5p$results %>% 
  dplyr::filter(metadata=="cbcldevprobtot_m24_std" & qval < .25 & N.not.zero >= .15*166)  %>% 
  dplyr::mutate(analysis = "developmental problems at age 2 years")

maaslin_tot2_results <- maaslin_tot_0.5p$results %>% 
  dplyr::filter(metadata=="cbcltotprobtot_m24_std" & qval < .25 & N.not.zero >= .15*150)  %>% 
  dplyr::mutate(analysis = "total problems at age 2 years")

maaslin_tot4_results <- maaslin_tot4_0.5p$results %>%
  dplyr::filter(metadata=="cbcltotprobtot_y4_std" & qval < .25 & N.not.zero >= .15*289)  %>%
  dplyr::mutate(analysis = "total problems at age 4 years")

maaslin_att4_results <- maaslin_att4_0.5p$results %>%
  dplyr::filter(metadata=="cbclattprobtot_y4_std" & qval < .25 & N.not.zero >= .15*202)  %>%
  dplyr::mutate(analysis = "attention problems at age 4 years")

maaslin_ext4_results <- maaslin_ext4_0.5p$results %>%
  dplyr::filter(metadata=="cbclextprobtot_y4_std" & qval < .25 & N.not.zero >= .15*202)  %>%
  dplyr::mutate(analysis = "externalizing problems at age 4 years")

maaslin_int4_results <- maaslin_int4_0.5p$results %>% 
  dplyr::filter(metadata=="cbclintprobtot_y4_std" & qval < .25 & N.not.zero >= .15*282)  %>% 
  dplyr::mutate(analysis = "internalizing problems at age 4 years")

maaslin_sleep4_results <- maaslin_sleep4_0.5p$results %>% 
  dplyr::filter(metadata=="cbclsleepprobtot_y4_std" & qval < .25 & N.not.zero >= .15*276)  %>% 
  dplyr::mutate(analysis = "sleep problems at age 4 years")

maaslin_dev4_results <- maaslin_dev4_0.5p$results %>%
  dplyr::filter(metadata=="cbcldevprobtot_y4_std" & qval < .25 & N.not.zero >= .15*289)  %>%
  dplyr::mutate(analysis = "dev problems at age 4 years")

maaslin_adhd4_results <- maaslin_adhd4_0.5p$results %>%
  dplyr::filter(metadata=="cbcladhdtot_y4_std" & qval < .25 & N.not.zero >= .15*202)  %>%
  dplyr::mutate(analysis = "adhd problems at age 4 years")

maaslin_.5_results <-
  adversity_results %>% 
  bind_rows(maaslin_tot2_results) %>% 
  bind_rows(maaslin_dev2_results) %>% 
  bind_rows(maaslin_int2_results) %>% 
  bind_rows(maaslin_tot4_results) %>% 
  bind_rows(maaslin_ext4_results) %>% 
  bind_rows(maaslin_int4_results) %>% 
  bind_rows(maaslin_sleep4_results) %>% 
  bind_rows(maaslin_dev4_results) %>% 
  bind_rows(maaslin_att4_results) %>% 
  bind_rows(maaslin_adhd4_results)

#Create new variables for graphing
#create new var: log2(exp(coeff))
maaslin_.5_results <-
  maaslin_.5_results %>% 
  dplyr::rename(taxa = feature) %>% 
  dplyr::mutate(
    log2FoldChange = sign(coef)*log(2*(exp(abs(coef)))),
    se_min = coef-stderr,
    se_max = coef+stderr,
    semin_log2fc = sign(se_min)*log(2*(exp(se_min))),
    semax_log2fc = sign(se_max)*log(2*(exp(se_max))),
    error_margin = qt(0.975, N.not.zero - 2, lower.tail = TRUE) * stderr, # manually calculate 95% confidence interval for a regression coefficient (t-value for alpha/2, df-2 x SE)
    lower_CI = coef - error_margin,
    upper_CI = coef + error_margin,
    lCI_log2fc = sign(lower_CI)*log(2*(exp(abs(lower_CI)))),
    uCI_log2fc = sign(upper_CI)*log(2*(exp(abs(upper_CI)))),
    fact_analysis = factor(analysis)
  )

#create new var with shorter taxa names
maaslin_.5_results <-
  maaslin_.5_results %>% 
  dplyr::mutate(
    Taxa = case_when(str_detect(taxa, "Intestinibacter") ~ "g__Intestinibacter",
                     str_detect(taxa, "sensu.stricto") ~ "g__Clostridium sensu stricto",
                     str_detect(taxa, "Lachnoclostridium.uncul") ~ "g__Lachnoclostridium",
                     str_detect(taxa, "Streptococcus") ~ "g__Streptococcus",
                     str_detect(taxa, "Parabacteroides") ~ "g__Parabacteroides",
                     str_detect(taxa, "Finegoldia") ~ "g__Finegoldia",
                     str_detect(taxa, "Ruminococcus") ~ "g__Ruminococcus",
                     str_detect(taxa, "Blautia") ~ "g__Blautia",
                     str_detect(taxa, "Peptoclostridium") ~ "g__Peptoclostridium",
                     str_detect(taxa, "Veillonella") ~ "g__Veillonella",
                     str_detect(taxa, "UCG") ~ "f__Lachnospiraceae",
                     str_detect(taxa, "Faecalibacterium") ~ "g__Faecalibacterium",
                     str_detect(taxa, "Coprobacillus") ~ "g__Coprobacillus"
    )
  )

#create more specific analysis variable that specifies the significant predictor in the models with all adversities together
maaslin_.5_results <- maaslin_.5_results %>% 
  dplyr::mutate(
    analysis = case_when(
      analysis=="all_adv" & name=="STAI_prorated_state_pw26" ~ "prenatal (all adversities in model)",
      analysis=="all_adv" & name=="postnatal_stress_syes" ~ "postnatal (all adversities in model)",
      analysis != "all_adv" ~ analysis
      )
  )

write_csv(maaslin_.5_results, "../../data/analysis_datasets/fran/maaslin2/final_results/all_maaslin_results_0.5p.csv")

Differential Abundance Dotplots

Figure 2: Differentially Abundant Taxa as a Function of Adversity Exposure.

maaslin_.5_results <- read_csv("../../data/analysis_datasets/fran/maaslin2/final_results/all_maaslin_results_0.5p.csv")
## Rows: 15 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): taxa, metadata, value, name, analysis, fact_analysis, Taxa
## dbl (16): coef, stderr, pval, qval, N, N.not.zero, log2FoldChange, se_min, s...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create dotplot for adversity variables
ggplot(data=maaslin_.5_results %>% filter(str_detect(analysis, "preconception|prenatal|postnatal")), aes(x = Taxa, y = log2FoldChange, color=factor(analysis))) +
  geom_point(aes(color = factor(analysis)), size = 2, position=position_dodge(width=0.3)) +
  geom_errorbar(aes(ymin=lCI_log2fc, ymax=uCI_log2fc), width=0, position=position_dodge(width=0.3)) +
  scale_x_discrete(expand=c(0.2, 0)) +
  coord_flip() +
  ylim(-1.5, 1.5) + #needs to be ylim bc coordinates have been flipped
  scale_y_continuous(breaks=seq(-1.5, 1.5, by = 0.50)) +
  geom_hline(aes(yintercept = 0)) + 
  scale_color_discrete("Adversity Exposure") +
  theme(axis.text = element_text(size = 10)) +
  theme_minimal() 
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

#ggsave("../../manuscripts/fran_adversity_microbiome/figures/adversity_da_figure.png", height = 4, width = 8, bg = "white")

Figure 3: Differentially Abundant Taxa as a Function of Adversity-Correlated Child Socioemotional Functioning

#create dotplot w taxa for SEF variables
ggplot(data=maaslin_.5_results %>% filter(str_detect(name, "cbcl")), aes(x = Taxa, y = log2FoldChange, color=factor(analysis))) +
  geom_point(aes(color = factor(analysis)), size = 2, position=position_dodge(width=0.05)) +
  geom_errorbar(aes(ymin=lCI_log2fc, ymax=uCI_log2fc), width=0, position=position_dodge(width=0.05)) +
  scale_x_discrete(expand=c(0.2, 0)) +
  coord_flip() +
  ylim(-1.5, 1.5) + #needs to be ylim bc coordinates have been flipped
  scale_y_continuous(breaks=seq(-1.5, 1.5, by = 0.50)) + 
  geom_hline(aes(yintercept = 0)) + 
  scale_color_discrete("Socioemotional Functioning Domain") +
  theme(axis.text = element_text(size = 10)) +
  theme_minimal()
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

#ggsave("../../manuscripts/fran_adversity_microbiome/figures/SEF_da_figure.png", height = 4, width=7, bg="white")